Index: server/mpm_common.c =================================================================== RCS file: /home/cvs/httpd-2.0/server/mpm_common.c,v retrieving revision 1.121 diff -u -r1.121 mpm_common.c --- server/mpm_common.c 14 Aug 2004 10:49:43 -0000 1.121 +++ server/mpm_common.c 15 Sep 2004 17:40:30 -0000 @@ -61,22 +61,55 @@ #ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES void ap_reclaim_child_processes(int terminate) { - int i; - long int waittime = 1024 * 16; /* in usecs */ + apr_time_t waittime = 1024 * 16; apr_status_t waitret; - int tries; + int i; int not_dead_yet; int max_daemons; + apr_time_t starttime = apr_time_now(); + /* this table of actions and elapsed times tells what action is taken + * at which elapsed time from starting the reclaim + */ + struct { + enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action; + apr_time_t action_time; + } action_table[] = { + {DO_NOTHING, 0}, /* dummy entry for iterations where we reap + * children but take no action against + * stragglers + */ + {SEND_SIGTERM, apr_time_from_sec(3)}, + {SEND_SIGTERM, apr_time_from_sec(4)}, + {SEND_SIGTERM, apr_time_from_sec(5)}, + {SEND_SIGKILL, apr_time_from_sec(7)}, + {GIVEUP, apr_time_from_sec(9)} + }; + int cur_action; /* index of action we decided to take this + * iteration + */ + int next_action = 1; /* index of first real action */ ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons); - for (tries = terminate ? 4 : 1; tries <= 9; ++tries) { - /* don't want to hold up progress any more than - * necessary, but we need to allow children a few moments to exit. - * Set delay with an exponential backoff. - */ + while (1) { apr_sleep(waittime); + /* don't let waittime get longer than 1 second; otherwise, we don't + * react quickly to the last child exiting, and taking action can + * be delayed + */ waittime = waittime * 4; + if (waittime > apr_time_from_sec(1)) { + waittime = apr_time_from_sec(1); + } + + /* see what action to take, if any */ + if (action_table[next_action].action_time <= apr_time_now() - starttime) { + cur_action = next_action; + ++next_action; + } + else { + cur_action = 0; /* nothing to do */ + } /* now see who is done */ not_dead_yet = 0; @@ -95,16 +128,11 @@ } ++not_dead_yet; - switch (tries) { - case 1: /* 16ms */ - case 2: /* 82ms */ - case 3: /* 344ms */ - case 4: /* 16ms */ + switch(action_table[cur_action].action) { + case DO_NOTHING: break; - - case 5: /* 82ms */ - case 6: /* 344ms */ - case 7: /* 1.4sec */ + + case SEND_SIGTERM: /* ok, now it's being annoying */ ap_log_error(APLOG_MARK, APLOG_WARNING, 0, ap_server_conf, @@ -113,9 +141,8 @@ (long)pid); kill(pid, SIGTERM); break; - - case 8: /* 6 sec */ - /* die child scum */ + + case SEND_SIGKILL: ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, "child process %ld still did not exit, " @@ -133,8 +160,8 @@ kill_thread(pid); #endif break; - - case 9: /* 14 sec */ + + case GIVEUP: /* gave it our best shot, but alas... If this really * is a child we are trying to kill and it really hasn't * exited, we will likely fail to bind to the port @@ -153,8 +180,9 @@ apr_proc_other_child_refresh_all(APR_OC_REASON_RESTART); #endif - if (!not_dead_yet) { - /* nothing left to wait for */ + if (!not_dead_yet || + action_table[cur_action].action == GIVEUP) { + /* nothing left to wait for, or we gave up */ break; } }