FYI, I've finally managed to make it work by using the low-level API to 
start the process (I didn't change the flapping configuration):

# determine the state on startup
w.transition(:init, { true => :up, false => :start }) do |on|
  on.condition(:process_running) do |c|
    c.running = true
  end
end

# determine when process has finished starting
w.transition([:start, :restart], :up) do |on|
  on.condition(:process_running) do |c|
    c.running = true
  end

  # failsafe
  on.condition(:tries) do |c|
    c.times = 3
    c.transition = :start
  end
end

# start if process is not running
w.transition(:up, :start) do |on|
  on.condition(:process_running) do |c|
    c.running = false
    #c.notify = 'developers'
  end
end

This way it does detect the flapping correctly. I still don't understand 
why it behaves differently than the earlier high-level version:

w.start_if do |start|
  start.condition(:process_running) do |c|
    c.interval = 15.seconds
    c.running = false
    #c.notify = 'developers'
  end
end

Reading the repo's documentation, it seems to me that flapping detection 
should work with either version. Either I'm missing something or it looks 
like a bug.

On Sunday, May 12, 2013 6:23:43 PM UTC+2, Simon Oulevay wrote:
>
> Hi. I'm trying to configure God's flapping condition to avoid infinite 
> restarts when a problem happens, but I can't seem to make it work. It's 
> never detecting the flapping state.
>
> That's my God configuration:
>
> God.watch do |w|
>
>   w.name = "myapp-unicorn"
>   w.group = "myapp"
>   w.dir = RAILS_ROOT
>
>   w.env = { 'RAILS_ENV' => environment }
>
>   w.start = "bundle exec unicorn -c config/unicorn.rb -E #{environment} -D"
>   w.stop = "kill -s QUIT `cat #{PID_FILE}`"
>   w.restart = "kill -s USR2 `cat #{PID_FILE}`"
>
>   w.pid_file = PID_FILE
>   w.behavior :clean_pid_file
>
>   w.interval = 30.seconds
>
>   w.start_if do |start|
>     start.condition(:process_running) do |c|
>       c.interval = 15.seconds
>       c.running = false
>       #c.notify = 'developers'
>     end
>   end
>
>   w.restart_if do |restart|
>     restart.condition(:memory_usage) do |c|
>       c.above = 150.megabytes
>       c.times = [3, 5] # 3 out of 5 intervals
>       #c.notify = 'developers'
>     end
>
>     restart.condition(:cpu_usage) do |c|
>       c.above = 50.percent
>       c.times = 5
>       #c.notify = 'developers'
>     end
>   end
>
>   w.lifecycle do |on|
>     on.condition(:flapping) do |c|
>       c.to_state = [:start, :restart]
>       c.times = 5
>       c.within = 5.minute
>       c.transition = :unmonitored
>       c.retry_in = 10.minutes
>       c.retry_times = 5
>       c.retry_within = 2.hours
>       #c.notify = 'developers'
>     end
>   end
> end
>
>
> So I tried to change bundle exec to budle exec in the start configuration 
> above to cause an error. This is what happens when I start god:
>
> $ RAILS_ENV=staging bundle exec god -c config/god.rb -D
> I [2013-05-07 12:16:19]  INFO: Loading config/god.rb
> I [2013-05-07 12:16:19]  INFO: Syslog enabled.
> I [2013-05-07 12:16:19]  INFO: Using pid file directory: 
> /home/myapp/.god/pids
> I [2013-05-07 12:16:19]  INFO: Started on drbunix:///tmp/god.17165.sock
> I [2013-05-07 12:16:19]  INFO: myapp-unicorn move 'unmonitored' to 'up'
> I [2013-05-07 12:16:19]  INFO: myapp-unicorn moved 'unmonitored' to 'up'
> I [2013-05-07 12:16:19]  INFO: myapp-unicorn [trigger] process is not 
> running (ProcessRunning)
> I [2013-05-07 12:16:19]  INFO: myapp-unicorn move 'up' to 'start'
> I [2013-05-07 12:16:19]  INFO: myapp-unicorn before_start: no pid file to 
> delete (CleanPidFile)
> I [2013-05-07 12:16:19]  INFO: myapp-unicorn start: budle exec unicorn -c 
> config/unicorn.rb -E staging -D
> W [2013-05-07 12:16:19]  WARN: myapp-unicorn start command exited with 
> non-zero code = 1
> I [2013-05-07 12:16:19]  INFO: myapp-unicorn moved 'up' to 'up'
> I [2013-05-07 12:16:19]  INFO: myapp-unicorn [trigger] process is not 
> running (ProcessRunning)
> I [2013-05-07 12:16:19]  INFO: myapp-unicorn move 'up' to 'start'
> I [2013-05-07 12:16:19]  INFO: myapp-unicorn before_start: no pid file to 
> delete (CleanPidFile)
> I [2013-05-07 12:16:19]  INFO: myapp-unicorn start: budle exec unicorn -c 
> config/unicorn.rb -E staging -D
> W [2013-05-07 12:16:19]  WARN: myapp-unicorn start command exited with 
> non-zero code = 1
> I [2013-05-07 12:16:19]  INFO: myapp-unicorn moved 'up' to 'up'
>
> ...
>
>
> And it keeps doing that, trying to restart several times per second. I 
> stopped it after a few seconds (it tried at least 50 times). I expected the 
> flapping condition to stop it after 5 tries. What I am understanding or 
> doing wrong?
>

-- 
You received this message because you are subscribed to the Google Groups 
"god.rb" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/god-rb?hl=en.
For more options, visit https://groups.google.com/groups/opt_out.


Reply via email to