This patch prevents multiple processes which are starting up from
grabbing the same process slot in the scoreboard.  Other processes may
still share that process slot while they are quiescing (due to
MaxRequestsPerChild, perform_idle_server_maintenance, or restarts).

Since only one process per slot will be starting new threads at any
given time, it eliminates the race conditions where multiple processes
both see what looks like a vacant worker slot simultaneously.

This works for me.  I would appreciate comments, and would love to see
other folks bang on it.

Greg
Index: include/scoreboard.h
===================================================================
RCS file: /cvs/apache/httpd-2.0/include/scoreboard.h,v
retrieving revision 1.28
diff -u -d -b -r1.28 scoreboard.h
--- scoreboard.h        2001/07/26 15:53:15     1.28
+++ scoreboard.h        2001/07/27 22:05:54
@@ -178,6 +178,9 @@
     pid_t pid;
     ap_generation_t generation;        /* generation of this child */
     ap_scoreboard_e sb_type;
+    int quiescing;          /* the process whose pid is stored above is
+                             * going down gracefully
+                             */
 };
 
 typedef struct {
Index: server/mpm/threaded/threaded.c
===================================================================
RCS file: /cvs/apache/httpd-2.0/server/mpm/threaded/threaded.c,v
retrieving revision 1.50
diff -u -d -b -r1.50 threaded.c
--- threaded.c  2001/07/26 18:11:53     1.50
+++ threaded.c  2001/07/27 22:05:55
@@ -650,6 +650,10 @@
         (request_rec *) NULL);
     dying = 1;
     apr_lock_acquire(worker_thread_count_mutex);
+    if (ap_my_pid == ap_scoreboard_image->parent[process_slot].pid) {
+        /* tell the parent that it may use this scoreboard slot */
+        ap_scoreboard_image->parent[process_slot].quiescing = 1;
+    }
     worker_thread_count--;
     if (worker_thread_count == 0) {
         /* All the threads have exited, now finish the shutdown process
@@ -900,6 +904,7 @@
         clean_child_exit(0);
     }
     /* else */
+    ap_scoreboard_image->parent[slot].quiescing = 0;
     ap_scoreboard_image->parent[slot].pid = pid;
     return 0;
 }
@@ -961,6 +966,7 @@
     int i, j;
     int idle_thread_count;
     worker_score *ws;
+    process_score *ps;
     int free_length;
     int free_slots[MAX_SPAWN_RATE];
     int last_non_dead;
@@ -1002,7 +1008,10 @@
                ++idle_thread_count;
            }
        }
-       if (any_dead_threads && free_length < idle_spawn_rate) {
+        ps = &ap_scoreboard_image->parent[i];
+        if (any_dead_threads && free_length < idle_spawn_rate 
+                && (!ps->pid              /* no process in the slot */
+                    || ps->quiescing)) {   /* or at least one is going away */
            free_slots[free_length] = i;
            ++free_length;
        }
@@ -1083,6 +1092,8 @@
                 for (i = 0; i < ap_threads_per_child; i++)
                     ap_update_child_status(child_slot, i, SERVER_DEAD, (request_rec 
*) NULL);
                 
+                ap_scoreboard_image->parent[child_slot].pid = 0;
+                ap_scoreboard_image->parent[child_slot].quiescing = 0;
                if (remaining_children_to_start
                    && child_slot < ap_daemons_limit) {
                    /* we're still doing a 1-for-1 replacement of dead

Reply via email to