Forum: CFEngine Help
Subject: Re: Aborting CFEngine agent if there is an earlier instance of it 
already running
Author: sauer
Link to topic: https://cfengine.com/forum/read.php?3,27239,27267#msg-27267

I had no luck with expireafter.  As I recall, there was some discussion earlier 
about adding a parameter in the executor to define a maximum execution time, 
but I don't know where that went.

My current solution (which is more than you asked for, but you can probably 
figure it out) looks like this:


########################################
# verify running processes
bundle agent cfengine_check {
vars:
  any::
    "agent_name"  slist => { "cf-agent", "cf-twin", "cf-promises" };
    # ps returns time in local zone, ago() uses UTC
    "agent_age"   int => ago(0,0,0,7,0,0); #year, month, day, hour, min, sec
    "now"         int => now();
    "agent_count" int => "6";

  any::
    "maxsize" int => "12000"; # 12,000 KB
  srv_any::
    "maxsize"  int => "30000"; # 30,000 KB on servers
  !srv_any::
    "maxsize"  int => "7000";  #  7,000 KB on clients
  any::
    "sizeprocs" slist => getindices( "maxsize" );

  any::
    "default_keys"  string => "RUN_CF_SERVERD";
    "default_keys"    string => "RUN_CF_EXECD";
    "default_keys" string => "RUN_CF_MONITORD";
    "default_keys"      string => "RUN_CF_HUB";
    "cfengine_procs" slist => getindices( "default_keys" );
    "can_proc[$(cfengine_procs)]" string => canonify("$(cfengine_procs)");

classes:
  "start_$(can_proc[$(cfengine_procs)])" expression =>
    regline( "$(default_keys[$(cfengine_procs)])=1",
             "$(update.cfengine_default_file)"
    );
  "kill_$(can_proc[$(cfengine_procs)])" not =>
    regline( "$(default_keys[$(cfengine_procs)])=1",
             "$(update.cfengine_default_file)"
    );

processes:
  "$(sizeprocs)"
    process_select => rsize_exceeds("$(maxsize[$(sizeprocs)])"),
    signals => { "term", "kill" },
    comment => "Kill $(sizeprocs) if RSS > $(maxsize[$(sizeprocs)])";
  "$(cfengine_procs)"
    ifvarclass    => "start_$(can_proc[$(cfengine_procs)])",
    restart_class => "restart_$(can_proc[$(cfengine_procs)])",
    comment => "restart cfengine if $(cfengine_procs) is dead";
  "$(cfengine_procs)"
    ifvarclass    => "kill_$(can_proc[$(cfengine_procs)])",
    signals => { "term", "kill" },
    comment => "kill $(cfengine_procs) if it's running";
  "$(agent_name)"
    process_select => cfengine_reaper_select("$(agent_age)"),
    signals => { "kill" },
    comment => "clean up old $(agent_name) processes";
  "$(agent_name)"
    process_count  => cfengine_reaper_count("$(agent_name)", "$(agent_count)"),
    comment => "count $(agent_name) processes";
  cfengine_gone_nuts::
  "$(agent_name)" # cf-agent won't kill itself, so just select everything
    process_select => cfengine_reaper_select("$(now)"),
    signals => { "kill" },
    comment => "clean up all $(agent_name) processes, it's gone nuts";

commands:
  !restarted_cfengine_procs::
  "$(update.cfengine_init_script) restart"
    ifvarclass => "restart_$(can_proc[$(cfengine_procs)])",
    classes    => if_ok("restarted_cfengine_procs");

reports:
  restarted_cfengine_procs::
    "Restarted cfengine procs ($(cfengine_procs) was down)"
      ifvarclass => "restart_$(can_proc[$(cfengine_procs)])";
  !restarted_cfengine_procs::
    "failed restarting cfengine procs ($(cfengine_procs) down)"
      ifvarclass => "restart_$(can_proc[$(cfengine_procs)])";
  cfengine_gone_nuts::
    "$(agent_name) had gone nuts; attempted return to sanity."
      ifvarclass => "$(can_proc[$(agent_name)])_gone_nuts";
}

# grab all root processes in time range or who are orphaned
body process_select cfengine_reaper_select(t) {
  process_owner => { "root" };
  stime_range => irange(0,"$(t)"); # started between the epoch and $t ago
  ppid => irange(0,1); # parent is 0 or 1
  process_result => "process_owner&(stime|ppid)";
}

# see if we have too many procs running
body process_count cfengine_reaper_count(p,c) {
  match_range => "0,$(c)";
  out_of_range_define => { "cfengine_gone_nuts", "$(p)_gone_nuts" };
}

# find processes with a resident stack size > $(limit)
body process_select rsize_exceeds(limit){
  # hpux seemingly lacks the rsize attribute, so this doesn't work there
  rsize          => irange("$(limit)","inf"); # vsize is over $(vsize_limit)
  process_result => "rsize";
}


I have the cfengine defaults file (/etc/defaults/cfengine, or whatever) defined 
in a bundle named update.

I also use 7 hours for the age, because my machines are between UTC-0400 and 
UTC-0600, so things which are actually only an hour old show up as being 7 
hours old in the farthest-back timezone.  I submitted a bug for that; I should 
probably check on that status and see if I still need to be working around that 
behavior. :)

_______________________________________________
Help-cfengine mailing list
Help-cfengine@cfengine.org
https://cfengine.org/mailman/listinfo/help-cfengine

Reply via email to