Forum: CFEngine Help Subject: Re: Aborting CFEngine agent if there is an earlier instance of it already running Author: sauer Link to topic: https://cfengine.com/forum/read.php?3,27239,27267#msg-27267
I had no luck with expireafter. As I recall, there was some discussion earlier about adding a parameter in the executor to define a maximum execution time, but I don't know where that went. My current solution (which is more than you asked for, but you can probably figure it out) looks like this: ######################################## # verify running processes bundle agent cfengine_check { vars: any:: "agent_name" slist => { "cf-agent", "cf-twin", "cf-promises" }; # ps returns time in local zone, ago() uses UTC "agent_age" int => ago(0,0,0,7,0,0); #year, month, day, hour, min, sec "now" int => now(); "agent_count" int => "6"; any:: "maxsize" int => "12000"; # 12,000 KB srv_any:: "maxsize" int => "30000"; # 30,000 KB on servers !srv_any:: "maxsize" int => "7000"; # 7,000 KB on clients any:: "sizeprocs" slist => getindices( "maxsize" ); any:: "default_keys" string => "RUN_CF_SERVERD"; "default_keys" string => "RUN_CF_EXECD"; "default_keys" string => "RUN_CF_MONITORD"; "default_keys" string => "RUN_CF_HUB"; "cfengine_procs" slist => getindices( "default_keys" ); "can_proc[$(cfengine_procs)]" string => canonify("$(cfengine_procs)"); classes: "start_$(can_proc[$(cfengine_procs)])" expression => regline( "$(default_keys[$(cfengine_procs)])=1", "$(update.cfengine_default_file)" ); "kill_$(can_proc[$(cfengine_procs)])" not => regline( "$(default_keys[$(cfengine_procs)])=1", "$(update.cfengine_default_file)" ); processes: "$(sizeprocs)" process_select => rsize_exceeds("$(maxsize[$(sizeprocs)])"), signals => { "term", "kill" }, comment => "Kill $(sizeprocs) if RSS > $(maxsize[$(sizeprocs)])"; "$(cfengine_procs)" ifvarclass => "start_$(can_proc[$(cfengine_procs)])", restart_class => "restart_$(can_proc[$(cfengine_procs)])", comment => "restart cfengine if $(cfengine_procs) is dead"; "$(cfengine_procs)" ifvarclass => "kill_$(can_proc[$(cfengine_procs)])", signals => { "term", "kill" }, comment => "kill $(cfengine_procs) if it's running"; "$(agent_name)" process_select => cfengine_reaper_select("$(agent_age)"), signals => { "kill" }, comment => "clean up old $(agent_name) processes"; "$(agent_name)" process_count => cfengine_reaper_count("$(agent_name)", "$(agent_count)"), comment => "count $(agent_name) processes"; cfengine_gone_nuts:: "$(agent_name)" # cf-agent won't kill itself, so just select everything process_select => cfengine_reaper_select("$(now)"), signals => { "kill" }, comment => "clean up all $(agent_name) processes, it's gone nuts"; commands: !restarted_cfengine_procs:: "$(update.cfengine_init_script) restart" ifvarclass => "restart_$(can_proc[$(cfengine_procs)])", classes => if_ok("restarted_cfengine_procs"); reports: restarted_cfengine_procs:: "Restarted cfengine procs ($(cfengine_procs) was down)" ifvarclass => "restart_$(can_proc[$(cfengine_procs)])"; !restarted_cfengine_procs:: "failed restarting cfengine procs ($(cfengine_procs) down)" ifvarclass => "restart_$(can_proc[$(cfengine_procs)])"; cfengine_gone_nuts:: "$(agent_name) had gone nuts; attempted return to sanity." ifvarclass => "$(can_proc[$(agent_name)])_gone_nuts"; } # grab all root processes in time range or who are orphaned body process_select cfengine_reaper_select(t) { process_owner => { "root" }; stime_range => irange(0,"$(t)"); # started between the epoch and $t ago ppid => irange(0,1); # parent is 0 or 1 process_result => "process_owner&(stime|ppid)"; } # see if we have too many procs running body process_count cfengine_reaper_count(p,c) { match_range => "0,$(c)"; out_of_range_define => { "cfengine_gone_nuts", "$(p)_gone_nuts" }; } # find processes with a resident stack size > $(limit) body process_select rsize_exceeds(limit){ # hpux seemingly lacks the rsize attribute, so this doesn't work there rsize => irange("$(limit)","inf"); # vsize is over $(vsize_limit) process_result => "rsize"; } I have the cfengine defaults file (/etc/defaults/cfengine, or whatever) defined in a bundle named update. I also use 7 hours for the age, because my machines are between UTC-0400 and UTC-0600, so things which are actually only an hour old show up as being 7 hours old in the farthest-back timezone. I submitted a bug for that; I should probably check on that status and see if I still need to be working around that behavior. :) _______________________________________________ Help-cfengine mailing list Help-cfengine@cfengine.org https://cfengine.org/mailman/listinfo/help-cfengine