This patch addresses a problem that Ian found this morning:
it's possible for the current worker code (from 2.0.36 onward)
to get stuck in a state where all the worker threads are waiting
in ap_queue_pop() but the queue_info object thinks there are no
idle threads. With this patch, the queue and queue_info objects
are combined, and the increment/decrement of the idle worker
count is done around the condition variable wait in ap_queue_pop().
Ian and I will be doing stress testing of the patch this afternoon,
but I'm posting the patch now in case anyone else wants to try it
in the meantime.
--Brian
Index: server/mpm/worker/fdqueue.h
===================================================================
RCS file: /home/cvs/httpd-2.0/server/mpm/worker/fdqueue.h,v
retrieving revision 1.19
diff -u -r1.19 fdqueue.h
--- server/mpm/worker/fdqueue.h 28 Apr 2002 23:12:35 -0000 1.19
+++ server/mpm/worker/fdqueue.h 21 May 2002 18:51:42 -0000
@@ -71,16 +71,6 @@
#endif
#include <apr_errno.h>
-typedef struct fd_queue_info_t fd_queue_info_t;
-
-apr_status_t ap_queue_info_create(fd_queue_info_t **queue_info,
- apr_pool_t *pool, int max_idlers);
-apr_status_t ap_queue_info_set_idle(fd_queue_info_t *queue_info,
- apr_pool_t *pool_to_recycle);
-apr_status_t ap_queue_info_wait_for_idler(fd_queue_info_t *queue_info,
- apr_pool_t **recycled_pool);
-apr_status_t ap_queue_info_term(fd_queue_info_t *queue_info);
-
struct fd_queue_elem_t {
apr_socket_t *sd;
apr_pool_t *p;
@@ -94,13 +84,19 @@
apr_thread_mutex_t *one_big_mutex;
apr_thread_cond_t *not_empty;
int terminated;
+ int idlers;
+ apr_thread_cond_t *idlers_available;
+ apr_pool_t **recycled_pools;
+ int num_recycled;
};
typedef struct fd_queue_t fd_queue_t;
apr_status_t ap_queue_init(fd_queue_t *queue, int queue_capacity, apr_pool_t *a);
apr_status_t ap_queue_push(fd_queue_t *queue, apr_socket_t *sd, apr_pool_t *p);
-apr_status_t ap_queue_pop(fd_queue_t *queue, apr_socket_t **sd, apr_pool_t **p);
+apr_status_t ap_queue_pop(fd_queue_t *queue, apr_socket_t **sd, apr_pool_t **p,
+ apr_pool_t **recycled);
apr_status_t ap_queue_interrupt_all(fd_queue_t *queue);
+apr_status_t ap_queue_wait_for_idler(fd_queue_t *queue, apr_pool_t **recycled);
apr_status_t ap_queue_term(fd_queue_t *queue);
#endif /* FDQUEUE_H */
Index: server/mpm/worker/fdqueue.c
===================================================================
RCS file: /home/cvs/httpd-2.0/server/mpm/worker/fdqueue.c,v
retrieving revision 1.22
diff -u -r1.22 fdqueue.c
--- server/mpm/worker/fdqueue.c 29 Apr 2002 01:57:39 -0000 1.22
+++ server/mpm/worker/fdqueue.c 21 May 2002 18:51:42 -0000
@@ -58,138 +58,6 @@
#include "fdqueue.h"
-struct fd_queue_info_t {
- int idlers;
- apr_thread_mutex_t *idlers_mutex;
- apr_thread_cond_t *wait_for_idler;
- int terminated;
- int max_idlers;
- apr_pool_t **recycled_pools;
- int num_recycled;
-};
-
-static apr_status_t queue_info_cleanup(void *data_)
-{
- fd_queue_info_t *qi = data_;
- int i;
- apr_thread_cond_destroy(qi->wait_for_idler);
- apr_thread_mutex_destroy(qi->idlers_mutex);
- for (i = 0; i < qi->num_recycled; i++) {
- apr_pool_destroy(qi->recycled_pools[i]);
- }
- return APR_SUCCESS;
-}
-
-apr_status_t ap_queue_info_create(fd_queue_info_t **queue_info,
- apr_pool_t *pool, int max_idlers)
-{
- apr_status_t rv;
- fd_queue_info_t *qi;
-
- qi = apr_palloc(pool, sizeof(*qi));
- memset(qi, 0, sizeof(*qi));
-
- rv = apr_thread_mutex_create(&qi->idlers_mutex, APR_THREAD_MUTEX_DEFAULT,
- pool);
- if (rv != APR_SUCCESS) {
- return rv;
- }
- rv = apr_thread_cond_create(&qi->wait_for_idler, pool);
- if (rv != APR_SUCCESS) {
- return rv;
- }
- qi->recycled_pools = (apr_pool_t **)apr_palloc(pool, max_idlers *
- sizeof(apr_pool_t *));
- qi->num_recycled = 0;
- qi->max_idlers = max_idlers;
- apr_pool_cleanup_register(pool, qi, queue_info_cleanup,
- apr_pool_cleanup_null);
-
- *queue_info = qi;
-
- return APR_SUCCESS;
-}
-
-apr_status_t ap_queue_info_set_idle(fd_queue_info_t *queue_info,
- apr_pool_t *pool_to_recycle)
-{
- apr_status_t rv;
- rv = apr_thread_mutex_lock(queue_info->idlers_mutex);
- if (rv != APR_SUCCESS) {
- return rv;
- }
- AP_DEBUG_ASSERT(queue_info->idlers >= 0);
- AP_DEBUG_ASSERT(queue_info->num_recycled < queue_info->max_idlers);
- if (pool_to_recycle) {
- queue_info->recycled_pools[queue_info->num_recycled++] =
- pool_to_recycle;
- }
- if (queue_info->idlers++ == 0) {
- /* Only signal if we had no idlers before. */
- apr_thread_cond_signal(queue_info->wait_for_idler);
- }
- rv = apr_thread_mutex_unlock(queue_info->idlers_mutex);
- if (rv != APR_SUCCESS) {
- return rv;
- }
- return APR_SUCCESS;
-}
-
-apr_status_t ap_queue_info_wait_for_idler(fd_queue_info_t *queue_info,
- apr_pool_t **recycled_pool)
-{
- apr_status_t rv;
- *recycled_pool = NULL;
- rv = apr_thread_mutex_lock(queue_info->idlers_mutex);
- if (rv != APR_SUCCESS) {
- return rv;
- }
- AP_DEBUG_ASSERT(queue_info->idlers >= 0);
- while ((queue_info->idlers == 0) && (!queue_info->terminated)) {
- rv = apr_thread_cond_wait(queue_info->wait_for_idler,
- queue_info->idlers_mutex);
- if (rv != APR_SUCCESS) {
- apr_status_t rv2;
- rv2 = apr_thread_mutex_unlock(queue_info->idlers_mutex);
- if (rv2 != APR_SUCCESS) {
- return rv2;
- }
- return rv;
- }
- }
- queue_info->idlers--; /* Oh, and idler? Let's take 'em! */
- if (queue_info->num_recycled) {
- *recycled_pool =
- queue_info->recycled_pools[--queue_info->num_recycled];
- }
- rv = apr_thread_mutex_unlock(queue_info->idlers_mutex);
- if (rv != APR_SUCCESS) {
- return rv;
- }
- else if (queue_info->terminated) {
- return APR_EOF;
- }
- else {
- return APR_SUCCESS;
- }
-}
-
-apr_status_t ap_queue_info_term(fd_queue_info_t *queue_info)
-{
- apr_status_t rv;
- rv = apr_thread_mutex_lock(queue_info->idlers_mutex);
- if (rv != APR_SUCCESS) {
- return rv;
- }
- queue_info->terminated = 1;
- apr_thread_cond_broadcast(queue_info->wait_for_idler);
- rv = apr_thread_mutex_unlock(queue_info->idlers_mutex);
- if (rv != APR_SUCCESS) {
- return rv;
- }
- return APR_SUCCESS;
-}
-
/**
* Detects when the fd_queue_t is full. This utility function is expected
* to be called from within critical sections, and is not threadsafe.
@@ -209,13 +77,17 @@
static apr_status_t ap_queue_destroy(void *data)
{
fd_queue_t *queue = data;
+ int i;
/* Ignore errors here, we can't do anything about them anyway.
* XXX: We should at least try to signal an error here, it is
* indicative of a programmer error. -aaron */
apr_thread_cond_destroy(queue->not_empty);
+ apr_thread_cond_destroy(queue->idlers_available);
apr_thread_mutex_destroy(queue->one_big_mutex);
-
+ for (i = 0; i < queue->num_recycled; i++) {
+ apr_pool_destroy(queue->recycled_pools[i]);
+ }
return APR_SUCCESS;
}
@@ -234,15 +106,25 @@
if ((rv = apr_thread_cond_create(&queue->not_empty, a)) != APR_SUCCESS) {
return rv;
}
+ if ((rv = apr_thread_cond_create(&queue->idlers_available, a)) !=
+ APR_SUCCESS) {
+ return rv;
+ }
queue->data = apr_palloc(a, queue_capacity * sizeof(fd_queue_elem_t));
queue->bounds = queue_capacity;
queue->nelts = 0;
+ queue->terminated = 0;
+ queue->idlers = 0;
/* Set all the sockets in the queue to NULL */
for (i = 0; i < queue_capacity; ++i)
queue->data[i].sd = NULL;
+ queue->recycled_pools = (apr_pool_t **)apr_palloc(a, queue->bounds *
+ sizeof(apr_pool_t *));
+ queue->num_recycled = 0;
+
apr_pool_cleanup_register(a, queue, ap_queue_destroy, apr_pool_cleanup_null);
return APR_SUCCESS;
@@ -285,7 +167,8 @@
* Once retrieved, the socket is placed into the address specified by
* 'sd'.
*/
-apr_status_t ap_queue_pop(fd_queue_t *queue, apr_socket_t **sd, apr_pool_t **p)
+apr_status_t ap_queue_pop(fd_queue_t *queue, apr_socket_t **sd, apr_pool_t **p,
+ apr_pool_t **recycled)
{
fd_queue_elem_t *elem;
apr_status_t rv;
@@ -294,6 +177,21 @@
return rv;
}
+ if (recycled && *recycled) {
+ if (queue->num_recycled == queue->bounds) {
+ apr_pool_destroy(*recycled);
+ }
+ else {
+ queue->recycled_pools[queue->num_recycled++] = *recycled;
+ }
+ *recycled = NULL;
+ }
+
+ queue->idlers++;
+ if (queue->idlers == 1) {
+ apr_thread_cond_signal(queue->idlers_available);
+ }
+
/* Keep waiting until we wake up and find that the queue is not empty. */
if (ap_queue_empty(queue)) {
if (!queue->terminated) {
@@ -301,6 +199,7 @@
}
/* If we wake up and it's still empty, then we were interrupted */
if (ap_queue_empty(queue)) {
+ queue->idlers--;
rv = apr_thread_mutex_unlock(queue->one_big_mutex);
if (rv != APR_SUCCESS) {
return rv;
@@ -322,6 +221,7 @@
elem->p = NULL;
#endif /* AP_DEBUG */
+ queue->idlers--;
rv = apr_thread_mutex_unlock(queue->one_big_mutex);
return rv;
}
@@ -338,6 +238,25 @@
return rv;
}
return APR_SUCCESS;
+}
+
+apr_status_t ap_queue_wait_for_idler(fd_queue_t *queue,
+ apr_pool_t **recycled_pool)
+{
+ apr_status_t rv;
+ if ((rv = apr_thread_mutex_lock(queue->one_big_mutex)) != APR_SUCCESS) {
+ return rv;
+ }
+ while (!queue->terminated && (queue->idlers == 0)) {
+ apr_thread_cond_wait(queue->idlers_available, queue->one_big_mutex);
+ }
+ if (queue->num_recycled > 0) {
+ *recycled_pool = queue->recycled_pools[--queue->num_recycled];
+ }
+ else {
+ *recycled_pool = NULL;
+ }
+ return apr_thread_mutex_unlock(queue->one_big_mutex);
}
apr_status_t ap_queue_term(fd_queue_t *queue)
Index: server/mpm/worker/worker.c
===================================================================
RCS file: /home/cvs/httpd-2.0/server/mpm/worker/worker.c,v
retrieving revision 1.124
diff -u -r1.124 worker.c
--- server/mpm/worker/worker.c 17 May 2002 11:11:39 -0000 1.124
+++ server/mpm/worker/worker.c 21 May 2002 18:51:43 -0000
@@ -173,7 +173,6 @@
static int num_listensocks = 0;
static int resource_shortage = 0;
static fd_queue_t *worker_queue;
-static fd_queue_info_t *worker_queue_info;
/* The structure used to pass unique initialization info to each thread */
typedef struct {
@@ -315,7 +314,6 @@
if (mode == ST_UNGRACEFUL) {
workers_may_exit = 1;
ap_queue_interrupt_all(worker_queue);
- ap_queue_info_term(worker_queue_info);
close_worker_sockets(); /* forcefully kill all current connections */
}
}
@@ -711,8 +709,7 @@
}
if (listener_may_exit) break;
- rv = ap_queue_info_wait_for_idler(worker_queue_info,
- &recycled_pool);
+ rv = ap_queue_wait_for_idler(worker_queue, &recycled_pool);
if (APR_STATUS_IS_EOF(rv)) {
break; /* we've been signaled to die now */
}
@@ -883,22 +880,13 @@
bucket_alloc = apr_bucket_alloc_create(apr_thread_pool_get(thd));
while (!workers_may_exit) {
- rv = ap_queue_info_set_idle(worker_queue_info, last_ptrans);
last_ptrans = NULL;
- if (rv != APR_SUCCESS) {
- ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
- "ap_queue_info_set_idle failed. Attempting to "
- "shutdown process gracefully.");
- signal_threads(ST_GRACEFUL);
- break;
- }
-
ap_update_child_status_from_indexes(process_slot, thread_slot, SERVER_READY,
NULL);
worker_pop:
if (workers_may_exit) {
break;
}
- rv = ap_queue_pop(worker_queue, &csd, &ptrans);
+ rv = ap_queue_pop(worker_queue, &csd, &ptrans, &last_ptrans);
if (rv != APR_SUCCESS) {
/* We get APR_EOF during a graceful shutdown once all the connections
@@ -1010,14 +998,6 @@
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
"ap_queue_init() failed");
- clean_child_exit(APEXIT_CHILDFATAL);
- }
-
- rv = ap_queue_info_create(&worker_queue_info, pchild,
- ap_threads_per_child);
- if (rv != APR_SUCCESS) {
- ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
- "ap_queue_info_create() failed");
clean_child_exit(APEXIT_CHILDFATAL);
}