fielding    97/10/06 08:05:34

  Modified:    src/main http_main.c
  Log:
  Changes in revision 1.218 caused reclaim_child_processes() to be too
  quick in checking for child exit and sending additional SIGTERM and
  SIGKILL interrupts.  Not a problem for systems with fast process context
  switching, but Solaris complains bitterly.  The fix is to insert a few
  quiet checks of the child status so that the exponential backoff on the
  wait time becomes significant before sending additional interrupts and
  complaining to the error log.
  
  Reviewed by:  Dean Gaudet
  
  Revision  Changes    Path
  1.233     +23 -16    apachen/src/main/http_main.c
  
  Index: http_main.c
  ===================================================================
  RCS file: /export/home/cvs/apachen/src/main/http_main.c,v
  retrieving revision 1.232
  retrieving revision 1.233
  diff -u -r1.232 -r1.233
  --- http_main.c       1997/10/05 08:12:45     1.232
  +++ http_main.c       1997/10/06 15:05:32     1.233
  @@ -1710,11 +1710,11 @@
       return -1;
   }
   
  -static void reclaim_child_processes(int start_tries)
  +static void reclaim_child_processes(int terminate)
   {
   #ifndef MULTITHREAD
       int i, status;
  -    long int waittime = 4096;        /* in usecs */
  +    long int waittime = 1024 * 16;   /* in usecs */
       struct timeval tv;
       int waitret, tries;
       int not_dead_yet;
  @@ -1724,17 +1724,14 @@
   
       sync_scoreboard_image();
   
  -    tries = 0;
  -    for (tries = start_tries; tries < 4; ++tries) {
  +    for (tries = terminate ? 4 : 1; tries <= 9; ++tries) {
        /* don't want to hold up progress any more than 
         * necessary, but we need to allow children a few moments to exit.
  -      * delay with an exponential backoff.
  -      * Currently set for a maximum wait of a bit over
  -      * four seconds.
  +      * Set delay with an exponential backoff.
         */
        tv.tv_sec = waittime / 1000000;
        tv.tv_usec = waittime % 1000000;
  -     waittime = waittime * 2;
  +     waittime = waittime * 4;
        ap_select(0, NULL, NULL, NULL, &tv);
   
        /* now see who is done */
  @@ -1752,28 +1749,38 @@
            }
            ++not_dead_yet;
            switch (tries) {
  -         case 1:
  +         case 1:     /*  16ms */
  +         case 2:     /*  82ms */
  +             break;
  +         case 3:     /* 344ms */
                /* perhaps it missed the SIGHUP, lets try again */
  -             aplog_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, server_conf,
  +             aplog_error(APLOG_MARK, APLOG_NOERRNO|APLOG_WARNING,
  +                         server_conf,
                    "child process %d did not exit, sending another SIGHUP",
                            pid);
                kill(pid, SIGHUP);
  +             waittime = 1024 * 16;
                break;
  -         case 2:
  +         case 4:     /*  16ms */
  +         case 5:     /*  82ms */
  +         case 6:     /* 344ms */
  +             break;
  +         case 7:     /* 1.4sec */
                /* ok, now it's being annoying */
  -             aplog_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, server_conf,
  +             aplog_error(APLOG_MARK, APLOG_NOERRNO|APLOG_WARNING,
  +                         server_conf,
                   "child process %d still did not exit, sending a SIGTERM",
                            pid);
                kill(pid, SIGTERM);
                break;
  -         case 3:
  +         case 8:     /*  6 sec */
                /* die child scum */
                aplog_error(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, server_conf,
                   "child process %d still did not exit, sending a SIGKILL",
                            pid);
                kill(pid, SIGKILL);
                break;
  -         case 4:
  +         case 9:     /* 14 sec */
                /* gave it our best shot, but alas...  If this really 
                 * is a child we are trying to kill and it really hasn't
                 * exited, we will likely fail to bind to the port
  @@ -3497,7 +3504,7 @@
            if (ap_killpg(pgrp, SIGTERM) < 0) {
                aplog_error(APLOG_MARK, APLOG_WARNING, server_conf, "killpg 
SIGTERM");
            }
  -         reclaim_child_processes(2);         /* Start with SIGTERM */
  +         reclaim_child_processes(1);         /* Start with SIGTERM */
            aplog_error(APLOG_MARK, APLOG_NOERRNO|APLOG_NOTICE, server_conf,
                        "httpd: caught SIGTERM, shutting down");
   
  @@ -3554,7 +3561,7 @@
            if (ap_killpg(pgrp, SIGHUP) < 0) {
                aplog_error(APLOG_MARK, APLOG_WARNING, server_conf, "killpg 
SIGHUP");
            }
  -         reclaim_child_processes(1);         /* Not when just starting up */
  +         reclaim_child_processes(0);         /* Not when just starting up */
            aplog_error(APLOG_MARK, APLOG_NOERRNO|APLOG_NOTICE, server_conf,
                        "SIGHUP received.  Attempting to restart");
        }
  
  
  

Reply via email to