We want to know what was going on with a lazy worker, KILL is un-trappable so we are sending TERM instead, then to make sure the process really dies we are KILLing a second later. --- lib/unicorn/http_server.rb | 31 +++++++++++++++++++------------ lib/unicorn/worker.rb | 7 ++++++- 2 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/lib/unicorn/http_server.rb b/lib/unicorn/http_server.rb index 21cb9a1..1b2a33b 100644 --- a/lib/unicorn/http_server.rb +++ b/lib/unicorn/http_server.rb @@ -474,22 +474,29 @@ class Unicorn::HttpServer end # forcibly terminate all workers that haven't checked in in timeout seconds. The timeout is implemented using an unlinked File + # Workers will be killed with TERM first, so the backtrace can be printed, on + # the next call to murder_lazy_workers the worker will receive KILL. def murder_lazy_workers next_sleep = @timeout - 1 now = Time.now.to_i WORKERS.dup.each_pair do |wpid, worker| - tick = worker.tick - 0 == tick and next # skip workers that haven't processed any clients - diff = now - tick - tmp = @timeout - diff - if tmp >= 0 - next_sleep > tmp and next_sleep = tmp - next + if worker.needs_sig_kill? + kill_worker(:KILL, wpid) + else + tick = worker.tick + 0 == tick and next # skip workers that haven't processed any clients + diff = now - tick + tmp = @timeout - diff + if tmp >= 0 + next_sleep > tmp and next_sleep = tmp + next + end + next_sleep = 0 + logger.error "worker=#{worker.nr} PID:#{wpid} timeout " \ + "(#{diff}s > #{@timeout}s), killing" + kill_worker(:TERM, wpid) + worker.needs_sig_kill = true end - next_sleep = 0 - logger.error "worker=#{worker.nr} PID:#{wpid} timeout " \ - "(#{diff}s > #{@timeout}s), killing" - kill_worker(:KILL, wpid) # take no prisoners for timeout violations end next_sleep <= 0 ? 1 : next_sleep end @@ -606,7 +613,7 @@ class Unicorn::HttpServer def init_worker_process(worker) worker.atfork_child # we'll re-trap :QUIT later for graceful shutdown iff we accept clients - EXIT_SIGS.each { |sig| trap(sig) { exit!(0) } } + EXIT_SIGS.each { |sig| trap(sig) { STDERR.puts(caller); exit!(0) } } exit!(0) if (SIG_QUEUE & EXIT_SIGS)[0] WORKER_QUEUE_SIGS.each { |sig| trap(sig, nil) } trap(:CHLD, 'DEFAULT') diff --git a/lib/unicorn/worker.rb b/lib/unicorn/worker.rb index e74a1c9..4214500 100644 --- a/lib/unicorn/worker.rb +++ b/lib/unicorn/worker.rb @@ -11,7 +11,7 @@ require "raindrops" class Unicorn::Worker # :stopdoc: attr_accessor :nr, :switched - attr_writer :tmp + attr_writer :tmp, :needs_sig_kill attr_reader :to_io # IO.select-compatible PER_DROP = Raindrops::PAGE_SIZE / Raindrops::SIZE @@ -25,6 +25,7 @@ class Unicorn::Worker @nr = nr @tmp = @switched = false @to_io, @master = Unicorn.pipe + @needs_sig_kill = false end def atfork_child # :nodoc: @@ -149,4 +150,8 @@ class Unicorn::Worker Process.euid != uid and Process::UID.change_privilege(uid) @switched = true end + + def needs_sig_kill? + @needs_sig_kill + end end -- 1.9.2 _______________________________________________ Unicorn mailing list - mongrel-unicorn@rubyforge.org http://rubyforge.org/mailman/listinfo/mongrel-unicorn Do not quote signatures (like this one) or top post when replying