Yurik has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/192273

Change subject: Instrumented and rewrote service logic
......................................................................

Instrumented and rewrote service logic

Change-Id: I34cfbe6ac7b6ea57cc346f280ef4d2b5bf3c4457
---
A config.labs.yaml
M config.yaml
M routes/v1.js
3 files changed, 503 insertions(+), 218 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/graphoid 
refs/changes/73/192273/1

diff --git a/config.labs.yaml b/config.labs.yaml
new file mode 100644
index 0000000..4540b43
--- /dev/null
+++ b/config.labs.yaml
@@ -0,0 +1,69 @@
+# Info about this config. Used for packaging & other purposes.
+info:
+  name: service-template-node
+  version: 0.0.1
+  description: A blueprint for MediaWiki REST API services
+
+# Number of worker processes to spawn.
+# Set to 0 to run everything in a single process without clustering.
+# Use 'ncpu' to run as many workers as there are CPU units
+num_workers: 0
+
+# Logger info
+logging:
+  level: trace
+#  streams:
+#  # Use gelf-stream -> logstash
+#  - type: gelf
+#    host: logstash1003.eqiad.wmnet
+#    port: 12201
+
+# Statsd metrics reporter
+metrics:
+  type: txstatsd
+  host: localhost
+  port: 8125
+
+services:
+  - name: service-template-node
+    # a relative path or the name of an npm package, if different from name
+    module: ./app.js
+    # optionally, a version constraint of the npm package
+    # version: ^0.4.0
+    # per-service config
+    conf:
+      port: 19000
+      # interface: localhost # uncomment to only listen on localhost
+      # compressionLevel: 3 # output gzip compression level, 6 by default
+      # more per-service config settings
+
+      # Graphoid-specific settings
+      # List of domains the service is allowed to access (with their 
subdomains)
+      domains:
+        - localhost
+        - 127.0.0.1
+        - wmflabs.org
+        - mediawiki.org
+        - wikibooks.org
+        - wikidata.org
+        - wikimedia.org
+        - wikimediafoundation.org
+        - wikinews.org
+        - wikipedia.org
+        - wikiquote.org
+        - wikisource.org
+        - wikiversity.org
+        - wikivoyage.org
+        - wiktionary.org
+      # Additional list of allowed domains with their replacements
+      # Even though the subdomains will also be allowed, the substitution
+      # will be done only on fully matched hostname.
+      # For example, it allows "graph.wmflabs.org" to be replaced with 
"localhost:12345"
+      domainMap:
+        zero.wmflabs.org: localhost
+        graphoid.wmflabs.org: localhost
+
+      # Which protocol should service use for protocol-relative URLs like 
//example.com/path
+      defaultProtocol: http
+      # Error if it takes longer than this to handle the request
+      timeout: 10000
diff --git a/config.yaml b/config.yaml
index c11eec8..ef3b865 120000
--- a/config.yaml
+++ b/config.yaml
@@ -1 +1,66 @@
-config.dev.yaml
\ No newline at end of file
+# Info about this config. Used for packaging & other purposes.
+info:
+  name: service-template-node
+  version: 0.0.1
+  description: A blueprint for MediaWiki REST API services
+
+# Number of worker processes to spawn.
+# Set to 0 to run everything in a single process without clustering.
+# Use 'ncpu' to run as many workers as there are CPU units
+num_workers: 0
+
+# Logger info
+logging:
+  level: trace
+#  streams:
+#  # Use gelf-stream -> logstash
+#  - type: gelf
+#    host: logstash1003.eqiad.wmnet
+#    port: 12201
+
+# Statsd metrics reporter
+metrics:
+  type: txstatsd
+  host: localhost
+  port: 8125
+
+services:
+  - name: service-template-node
+    # a relative path or the name of an npm package, if different from name
+    module: ./app.js
+    # optionally, a version constraint of the npm package
+    # version: ^0.4.0
+    # per-service config
+    conf:
+      port: 6930
+      # interface: localhost # uncomment to only listen on localhost
+      # compressionLevel: 3 # output gzip compression level, 6 by default
+      # more per-service config settings
+
+      # Graphoid-specific settings
+      # List of domains the service is allowed to access (with their 
subdomains)
+      domains:
+        - localhost
+        - 127.0.0.1
+        - wmflabs.org
+        - mediawiki.org
+        - wikibooks.org
+        - wikidata.org
+        - wikimedia.org
+        - wikimediafoundation.org
+        - wikinews.org
+        - wikipedia.org
+        - wikiquote.org
+        - wikisource.org
+        - wikiversity.org
+        - wikivoyage.org
+        - wiktionary.org
+      # Additional list of allowed domains with their replacements
+      # Even though the subdomains will also be allowed, the substitution
+      # will be done only on fully matched hostname.
+      # For example, it allows "graph.wmflabs.org" to be replaced with 
"localhost:12345"
+      domainMap: {}
+      # Which protocol should service use for protocol-relative URLs like 
//example.com/path
+      defaultProtocol: http
+      # Error if it takes longer than this to handle the request
+      timeout: -1
diff --git a/routes/v1.js b/routes/v1.js
index f416ea2..99fbc65 100644
--- a/routes/v1.js
+++ b/routes/v1.js
@@ -3,15 +3,18 @@
 var express = require('express'),
     preq = require('preq'),
     Promise = require('bluebird'),
+    urllib = require('url'),
     vega = null; // Visualization grammar - https://github.com/trifacta/vega
 
-try{
-    // Simplify debugging when vega is not available
-    vega = require('vega');
-} catch(err) {
-    console.error(err);
-}
+/**
+ * Main log function
+ */
+var log;
 
+/**
+ * Metrics object
+ */
+var metrics;
 
 /**
  * The main router object
@@ -19,14 +22,9 @@
 var router = express.Router();
 
 /**
- * A list of allowed hosts
- */
-var domains = [];
-
-/**
  * A set of 'oldHost' => 'newHost' mappings
  */
-var domainMap = {};
+var domainMap = false;
 
 /**
  * For protocol-relative URLs  (they begin with //), which protocol should we 
use
@@ -39,14 +37,32 @@
 var timeout = 10000;
 
 /**
- * Regex to validate server parameter
+ * Regex to validate host parameter
  */
 var serverRe = null;
 
-function init(conf) {
 
-    domains = conf.domains || domains;
-    domainMap = conf.domainMap || domainMap;
+function init(app) {
+
+    // The very first operation should set up our logger
+    log = app.logger.log.bind(app.logger);
+    metrics = app.metrics;
+
+    // Uncomment to console.log metrics calls
+    //metrics = wrapMetrics(app.metrics);
+
+    log('info/init', 'starting v1' );
+    metrics.increment('v1.init');
+
+    try{
+        // Simplify debugging when vega is not available
+        vega = require('vega');
+    } catch(err) {
+        log('fatal/vega', err);
+    }
+
+    var conf = app.conf;
+    var domains = conf.domains || domains;
     timeout = conf.timeout || timeout;
     defaultProtocol = conf.defaultProtocol || defaultProtocol;
     if (!defaultProtocol.endsWith(':')) {
@@ -54,38 +70,335 @@
         defaultProtocol = defaultProtocol + ':';
     }
 
-    var validDomains = domains.concat(Object.getOwnPropertyNames(domainMap));
+    var validDomains = domains;
+    if (conf.domainMap && Object.getOwnPropertyNames(domainMap) > 0) {
+        domainMap = conf.domainMap;
+        validDomains = 
validDomains.concat(Object.getOwnPropertyNames(domainMap))
+    }
 
     if (validDomains.length == 0) {
-        console.error('Config must have non-empty "domains" (list) and/or 
"domainMap" (dict)');
+        log('fatal/config', 'Config must have non-empty "domains" (list) 
and/or "domainMap" (dict)');
         process.exit(1);
     }
 
     serverRe = new RegExp('^([-a-z0-9]+\\.)?(m\\.|zero\\.)?(' + 
validDomains.join('|') + ')$');
-
-    if (vega) {
-        vega.config.domainWhiteList = domains;
-        vega.config.defaultProtocol = defaultProtocol;
-        vega.config.safeMode = true;
-        if (Object.getOwnPropertyNames(domainMap) > 0) {
-            var originalSanitize = vega.data.load.sanitizeUrl;
-            vega.data.load.sanitizeUrl = function(url) {
-                url = originalSanitize(url);
-                if (url) {
-                    url = url.replace(/^(https?:\/\/)([-a-z0-9.]+)/, 
function(match, prot, host){
-                        var repl = domainMap[host];
-                        return repl ? prot + repl : match;
-                    });
-                }
-                return url;
-            };
-        }
-    }
+    initVega(domains);
 }
+
+
+/**
+ * Init vega rendering
+ * @param domains array of strings - which domains are valid
+ */
+function initVega(domains) {
+    if (!vega) {
+        return;
+    }
+    vega.config.domainWhiteList = domains;
+    vega.config.defaultProtocol = defaultProtocol;
+    vega.config.safeMode = true;
+
+    //
+    // TODO/BUG:  In multithreaded env, we cannot set global vega.config var
+    // while handling multiple requests from multiple hosts.
+    // Until vega is capable of per-rendering context, we must bail on any
+    // relative (no hostname) data or image URLs.
+    //
+    // Do not set vega.config.baseURL. Current sanitizer implementation will 
fail
+    // because of the missing protocol (safeMode == true). Still, lets double 
check
+    // here, in case user has   'http:pathname', which for some strange reason 
is
+    // parsed as correct by url lib.
+    //
+    var originalSanitize = vega.data.load.sanitizeUrl;
+    vega.data.load.sanitizeUrl = function (urlOrig) {
+        url = originalSanitize(urlOrig);
+        if (url) {
+            var parts = urllib.parse(url);
+            if (!parts.protocol || !parts.hostname) {
+                url = null;
+            }
+        }
+        if (url && domainMap) {
+            url = url.replace(/^(https?:\/\/)([^#?\/]+)/, function (match, 
prot, host) {
+                var repl = domainMap[host];
+                return repl ? prot + repl : match;
+            });
+        }
+
+        if (!url) {
+            log('info/url-deny', urlOrig);
+        } else if (urlOrig !== url) {
+            log('info/url-fix', {'req': urlOrig, 'repl': url});
+        } else {
+            log('info/url-ok', urlOrig);
+        }
+        return url;
+    };
+}
+
+
+/**
+ * Parse and validate request parameters
+ */
+function validateRequest(state) {
+
+    var start = Date.now();
+
+    var p = state.request.params,
+        host = p.host,
+        title = p.title,
+        revid = p.revid,
+        id = p.id;
+
+    state.log = p; // log all parameters of the request
+
+    state.apiRequest = {
+        format: 'json',
+        action: 'query',
+        prop: 'pageprops',
+        ppprop: 'graph_specs',
+        continue: ''
+    };
+
+    if (revid) {
+        if (!/^[0-9]+$/.test(revid)) {
+            // must be a non-negative integer
+            throw new Err('info/param-revid', 'req.revid');
+        }
+        revid = parseInt(revid);
+    }
+
+    if (revid) {
+        state.apiRequest.revids = revid;
+    } else if (title) {
+        if (title.indexOf('|') > -1) {
+            throw new Err('info/param-title', 'req.title');
+        }
+        state.apiRequest.titles = title;
+    } else {
+        throw new Err('info/param-page', 'req.page');
+    }
+
+    if (!/^[0-9a-f]+$/.test(id)) {
+        throw new Err('info/param-id', 'req.id');
+    }
+    state.graphId = id;
+
+    var parts = serverRe.exec(host);
+    if (!parts) {
+        throw new Err('info/param-host', 'req.host');
+    }
+    // Remove optional part #2 from host (makes m. links appear as desktop to 
optimize cache)
+    // 1  2 3
+    // en.m.wikipedia.org
+    var host2 = parts[3];
+    if (parts[1]) {
+        host2 = parts[1] + host2;
+    }
+    host2 = (domainMap && domainMap[host2]) || host2;
+
+    state.host = host2;
+    state.apiUrl = defaultProtocol + '//' + host2 + '/w/api.php';
+    if (host !== host2) {
+        state.log.backend = host2;
+    }
+
+    metrics.endTiming('req.time', start);
+
+    return state;
+}
+
+/**
+ * Retrieve graph specifications from the host
+ * @param state is the object with the current state of the request processing
+ */
+function downloadGraphDef(state) {
+
+    var startDefDownload = Date.now();
+    state.log.calls = [];
+
+    // 
http://stackoverflow.com/questions/24660096/correct-way-to-write-loops-for-promise
+    var loopAsync = Promise.method(function (action, condition, value) {
+        var req = condition(value);
+        if (req) {
+            return action(req).then(loopAsync.bind(null, action, condition));
+        }
+    });
+
+    return loopAsync(function (req) {
+
+        var startApiReq = Date.now();
+        state.log.calls.push(req);
+        var requestOpts = {
+            uri: state.apiUrl,
+            query: req,
+            headers: {'User-Agent': 'graph.ext backend (yurik at wikimedia)'}
+        };
+        return preq(requestOpts)
+            .then(function (resp) {
+                metrics.endTiming('host.time', startApiReq);
+                return resp;
+            });
+
+    }, function (apiRes) {
+
+        // If first run, always allow
+        if (!apiRes) {
+            return state.apiRequest;
+        }
+
+        if (apiRes.status !== 200) {
+            state.log.apiRetStatus = apiRes.status;
+            throw new Err('error/host-status', 'host.status');
+        }
+
+        var res = apiRes.body;
+        if (res.hasOwnProperty('error')) {
+            state.log.apiRetError = res.error;
+            throw new Err('error/host-error', 'host.error');
+        }
+
+        if (res.hasOwnProperty('warnings')) {
+            state.log.apiWarning = res.warnings;
+            log('warn/host-warning', state.log);
+            // Warnings are usually safe to continue
+        }
+
+        if (res.hasOwnProperty('query') && res.query.hasOwnProperty('pages')) {
+            var pages = res.query.pages,
+                graphData = null;
+
+            Object.getOwnPropertyNames(pages).some(function (k) {
+                var page = pages[k];
+                if (page.hasOwnProperty('pageprops') && 
page.pageprops.hasOwnProperty('graph_specs')) {
+                    var gs = JSON.parse(page.pageprops.graph_specs);
+                    if (gs.hasOwnProperty(state.graphId)) {
+                        graphData = gs[state.graphId];
+                        return true;
+                    }
+                }
+                return false;
+            });
+
+            if (graphData) {
+                state.graphData = graphData;
+                return false; // found needed result
+            }
+        }
+        if (res.hasOwnProperty('continue')) {
+            return merge(state.apiRequest, res.continue);
+        }
+        throw new Err('info/host-no-graph', 'host.no-graph');
+
+    }).then(function () {
+        metrics.endTiming('host.total', startDefDownload);
+        return state;
+    });
+}
+
+function renderOnCanvas(state) {
+    return new Promise(function (fulfill, reject){
+        if (!vega) {
+            // If vega is down, keep reporting it
+            throw new Err('fatal/vega', 'vega.missing');
+        }
+
+        var start = Date.now();
+
+        // BUG: see comment above at vega.data.load.sanitizeUrl = ...
+        // In case of non-absolute URLs, use requesting host as "local"
+        //vega.config.baseURL = defaultProtocol + '//' + state.host;
+
+        vega.headless.render({spec: state.graphData, renderer: 'canvas'}, 
function (err, result) {
+            if (err) {
+                state.log.vegaerr = err;
+                reject(new Err('error/vega', 'vega.error'));
+            } else {
+                var stream = result.canvas.pngStream();
+                state.response.status(200).type('png');
+                stream.on('data', function (chunk) {
+                    state.response.write(chunk);
+                });
+                stream.on('end', function () {
+                    state.response.end();
+                    metrics.endTiming('vega.time', start);
+                    fulfill(state);
+                });
+            }
+        });
+    });
+}
+
+/**
+ * Main entry point for graphoid
+ */
+router.get('/:host/:title/:revid/:id.png', function(req, res) {
+
+    var start = Date.now();
+    var state = {request: req, response: res};
+
+    var render = Promise
+        .resolve(state)
+        .then(validateRequest)
+        .then(downloadGraphDef)
+        .then(renderOnCanvas);
+
+    failOnTimeout(render, timeout)
+        .then(function () {
+
+            // SUCCESS
+            // For now, record everything, but soon we should scale it back
+            log('info/ok', state.log);
+            metrics.endTiming('total.time', start);
+
+        },function (reason) {
+
+            // FAILURE
+            var l = state.log;
+            var msg = 'error/unknown',
+                mx = 'error.unknown';
+
+            if (reason instanceof Err) {
+                l = merge(reason, l);
+                msg = reason.message;
+                mx = reason.metrics;
+                delete l.message;
+                delete l.metrics;
+            } else if (reason !== null && typeof reason === 'object') {
+                l = merge(reason, l);
+            } else {
+                l.exception = reason;
+            }
+
+            res.status(400).json(msg);
+            metrics.increment(mx);
+            log(msg, l);
+        });
+});
+
+
+module.exports = function(app) {
+
+    init(app);
+
+    return {
+        path: '/v1',
+        router: router
+    };
+};
+
+
 
 /*
  * Utility functions
  */
+
+function Err(message, metrics) {
+    this.message = message;
+    this.metrics = metrics;
+}
+Err.prototype = Object.create(Error.prototype);
+Err.prototype.constructor = Err;
 
 // NOTE: there are a few libraries that do this
 function merge() {
@@ -111,191 +424,29 @@
 function failOnTimeout(promise, time) {
     return time <= 0 ? promise :
         Promise.race([promise, delay(time).then(function () {
-            throw 'Operation timed out';
+            throw 'timeout'; // we later compare on this value
         })]);
 }
 
 /**
- * Parse and validate request parameters
+ * When enabled, logs metrics functions calls
+ * @param obj
+ * @returns {{increment: *, endTiming: *}}
  */
-function validateRequest(state) {
-
-    var p = state.request.params,
-        server = p.server,
-        title = p.title,
-        revid = p.revid,
-        id = p.id;
-
-    state.apiRequest = {
-        format: 'json',
-        action: 'query',
-        prop: 'pageprops',
-        ppprop: 'graph_specs',
-        continue: ''
-    };
-
-    if (revid) {
-        if (!/^[0-9]+$/.test(revid)) {
-            // must be a non-negative integer
-            throw 'bad revid param';
-        }
-        revid = parseInt(revid);
-    }
-
-    if (revid) {
-        state.apiRequest.revids = revid;
-    } else if (title) {
-        if (title.indexOf('|') > -1) {
-            throw 'bad title param';
-        }
-        state.apiRequest.titles = title;
-    } else {
-        throw 'no revid or title given';
-    }
-
-    if (!/^[0-9a-f]+$/.test(id)) {
-        throw 'bad id param';
-    }
-    state.graphId = id;
-
-    // Remove optional part #2 from host (makes m. links appear as desktop to 
optimize cache)
-    // 1  2 3
-    // en.m.wikipedia.org
-    var srvParts = serverRe.exec(server);
-    if (!srvParts) {
-        throw 'bad server param';
-    }
-    server = (srvParts[1] || '') + srvParts[3];
-
-    state.server = domainMap[server] || server;
-    state.apiUrl = defaultProtocol + '//' + server + '/w/api.php';
-
-    return state;
-}
-
-/**
- * Retrieve graph specifications from the server
- * @param state is the object with the current state of the request processing
- */
-function getSpec(state) {
-
-    var callApiInt;
-
-    var processResult = function (apiRes) {
-        if (apiRes.status !== 200) {
-            throw 'API result error code ' + apiRes.status;
-        }
-        var res = apiRes.body;
-        if (res.hasOwnProperty('error')) {
-            throw 'API result error: ' + JSON.stringify(res.error);
-        }
-
-        if (res.hasOwnProperty('warnings')) {
-            console.error('API warning: ' + JSON.stringify(res.warnings) +
-            ' from ' + state.server + JSON.stringify(state.apiRequest));
-        }
-        if (res.hasOwnProperty('query') && res.query.hasOwnProperty('pages')) {
-            var pages = res.query.pages,
-                graphData = null;
-
-            Object.getOwnPropertyNames(pages).some(function (k) {
-                var page = pages[k];
-                if (page.hasOwnProperty('pageprops') && 
page.pageprops.hasOwnProperty('graph_specs')) {
-                    var gs = JSON.parse(page.pageprops.graph_specs);
-                    if (gs.hasOwnProperty(state.graphId)) {
-                        graphData = gs[state.graphId];
-                        return true;
-                    }
-                }
-                return false;
-            });
-
-            if (graphData) {
-                state.graphData = graphData;
-                return state;
-            }
-        }
-        if (res.hasOwnProperty('continue')) {
-            callApiInt(state.apiUrl, merge(state.apiRequest, res.continue));
-        }
-        throw 'Unable to find graph_specs with the given id';
-    };
-
-    callApiInt = function(url, req) {
-        var reqOpts = {
-            uri: url,
-            query: req,
-            headers: {
-                'User-Agent': 'graph.ext backend (yurik at wikimedia)'
-            }
+function wrapMetrics(obj) {
+    function logWrap(name){
+        return function(){
+            console.log(name + JSON.stringify([].slice.call(arguments)));
+            return obj[name].apply(obj, arguments);
         };
-        return preq(reqOpts)
-            .then(processResult)
-            .catch(function (reason) {
-                delete reqOpts.headers;
-                console.error('API call failed: ' + state.server + 
JSON.stringify(state.apiRequest));
-                throw reason; // re-throw
-            });
-    };
-
-    return callApiInt(state.apiUrl, state.apiRequest);
-}
-
-function renderOnCanvas(state) {
-    return new Promise(function (fulfill, reject){
-        if (!vega) {
-            throw 'Unable to load Vega npm module';
-        }
-
-        // In case of non-absolute URLs, use requesting server as "local"
-        vega.config.baseURL = defaultProtocol + '//' + state.server;
-
-        vega.headless.render({spec: state.graphData, renderer: 'canvas'}, 
function (err, result) {
-            if (err) {
-                reject(err);
-            } else {
-                var stream = result.canvas.pngStream();
-                state.response.status(200).type('png');
-                stream.on('data', function (chunk) {
-                    state.response.write(chunk);
-                });
-                stream.on('end', function () {
-                    state.response.end();
-                    fulfill(state);
-                });
+    }
+    var result = {};
+    for (var id in obj) {
+        try {
+            if (typeof(obj[id]) == "function") {
+                result[id] = logWrap(id);
             }
-        });
-    });
+        } catch (err) {}
+    }
+    return result;
 }
-
-/**
- * Main entry point for graphoid
- */
-router.get('/:server/:title/:revid/:id.png', function(req, res) {
-
-    var render = Promise
-        .resolve({request: req, response: res})
-        .then(validateRequest)
-        .then(getSpec)
-        .then(renderOnCanvas);
-
-    failOnTimeout(render, timeout)
-        .catch(function (reason) {
-            console.error('Failed ' + JSON.stringify(req.params) + ' ' + 
reason);
-            if (reason.hasOwnProperty('stack')) {
-                console.error(reason.stack);
-            }
-            res.status(400).json(reason);
-        });
-});
-
-
-module.exports = function(app) {
-
-    init(app.conf);
-
-    return {
-        path: '/v1',
-        router: router
-    };
-};

-- 
To view, visit https://gerrit.wikimedia.org/r/192273
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I34cfbe6ac7b6ea57cc346f280ef4d2b5bf3c4457
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/graphoid
Gerrit-Branch: master
Gerrit-Owner: Yurik <yu...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to