Arlolra has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/268848

Change subject: Follow up to 4d06e1ba
......................................................................

Follow up to 4d06e1ba

 * @inez noticed that ctlr-c sends SIGINT to all processes in the group,
   causing workers to race to disconnect.  This could put a worker in
   the "disconnected" state, making `worker.disconncet()` throw and
   preventing the timeout from being cleared.  When it fires, we get a
   "kill ESRCH".

 * Cluster in node v0.8 doesn't have a `worker.kill()` method, so we use
   the internal worker.process.kill instead.  This is what node does
   anyways and is safer than the `process.kill()` in the timeout, so we
   use it for both.

 * Finally, we prevent kill workers from starting back up on exit.
   That's something that was missed in the service-runner port.

Change-Id: I61a8f034cde7d5ef97e3064683197fdc4e98e00a
---
M bin/server.js
1 file changed, 15 insertions(+), 7 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/48/268848/1

diff --git a/bin/server.js b/bin/server.js
index 2da9a61..a30d045 100755
--- a/bin/server.js
+++ b/bin/server.js
@@ -123,25 +123,29 @@
        });
 };
 
-var stopWorker = function(workerId) {
+var stopWorker = Promise.method(function(workerId) {
        var worker = cluster.workers[workerId];
+       // ctrl-c sends SIGINT to all the workers, so make sure we haven't lost
+       // the race.  In the disconnected state, `worker.disconnect()` throws
+       // an error, "channel closed".
+       if (worker.state === 'disconnected') { return; }
        var p = new Promise(function(resolve) {
                var timeout = setTimeout(function() {
                        // 
https://nodejs.org/api/cluster.html#cluster_worker_kill_signal_sigterm
                        // `worker.kill()` wants `worker.state === 
'disconnected'`.
                        // If that doesn't happen shortly, escalate!
-                       process.kill(worker.process.pid, 'SIGKILL');
+                       worker.process.kill('SIGKILL');
                        resolve();
                }, 10 * 1000);
-               worker.on('disconnect', function() {
-                       worker.kill('SIGKILL');
+               worker.once('disconnect', function() {
                        clearTimeout(timeout);
+                       worker.process.kill('SIGKILL');
                        resolve();
                });
        });
        worker.disconnect();
        return p;
-};
+});
 
 if (cluster.isMaster && argv.n > 0) {
        // Master
@@ -187,8 +191,10 @@
                spawn();
        }
 
+       var shuttingDown = false;
+
        cluster.on('exit', function(worker, code, signal) {
-               if (!worker.suicide) {
+               if (!worker.suicide && !shuttingDown) {
                        var pid = worker.process.pid;
                        processLogger.log("warning", util.format("worker %s 
died (%s), restarting.", pid, signal || code));
                        if (timer) { timer.count('worker.exit.' + (signal || 
code), ''); }
@@ -197,11 +203,12 @@
        });
 
        var shutdownMaster = function() {
+               shuttingDown = true;
                processLogger.log('info', 'shutting down, killing workers');
                Promise.map(Object.keys(cluster.workers), 
stopWorker).then(function() {
                        processLogger.log('info', 'exiting');
                        process.exit(0);
-               });
+               }).done();
        };
 
        process.on('SIGINT', shutdownMaster);
@@ -215,6 +222,7 @@
                process.exit(0);
        };
 
+       process.on('SIGINT', shutdownWorker);
        process.on('SIGTERM', shutdownWorker);
        process.on('disconnect', shutdownWorker);
 

-- 
To view, visit https://gerrit.wikimedia.org/r/268848
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I61a8f034cde7d5ef97e3064683197fdc4e98e00a
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to