The following reply was made to PR os-solaris/1273; it has been noted by GNATS.
From: Dean Gaudet <[EMAIL PROTECTED]>
To: Steve Schall <[EMAIL PROTECTED]>
Cc: [EMAIL PROTECTED]
Subject: Re: os-solaris/1273: Monitoring system showed system down!
Date: Thu, 6 Nov 1997 02:11:44 -0800 (PST)
It turns out that this is likely an Apache bug. You have two options if
you want to use 1.3b2. You can either edit main/src/conf.h, search for
SOLARIS, and replace the USE_PTHREAD_SERIALIZED_ACCEPT definition with
USE_FCNTL_SERIALIZED_ACCEPT. Or you can try the included patch. Or you
can wait for 1.3b3.
Dean
Index: http_main.c
===================================================================
RCS file: /export/home/cvs/apachen/src/main/http_main.c,v
retrieving revision 1.243
diff -u -r1.243 http_main.c
--- http_main.c 1997/11/03 10:11:42 1.243
+++ http_main.c 1997/11/06 06:57:22
@@ -333,18 +333,33 @@
#elif defined (USE_PTHREAD_SERIALIZED_ACCEPT)
/* This code probably only works on Solaris ... but it works really fast
- * on Solaris
+ * on Solaris. Note that pthread mutexes are *NOT* released when a task
+ * dies ... the task has to free it itself. So we block signals and
+ * try to be nice about releasing the mutex.
*/
#include <pthread.h>
-static pthread_mutex_t *accept_mutex;
+static pthread_mutex_t *accept_mutex = (void *)(caddr_t) -1;
+static int have_accept_mutex;
+static sigset_t accept_block_mask;
+static sigset_t accept_previous_mask;
+
+static void accept_mutex_child_cleanup(void *data)
+{
+ if (accept_mutex != (void *)(caddr_t)-1
+ && have_accept_mutex) {
+ pthread_mutex_unlock(accept_mutex);
+ }
+}
static void accept_mutex_cleanup(void)
{
- if (munmap((caddr_t) accept_mutex, sizeof(*accept_mutex))) {
+ if (accept_mutex != (void *)(caddr_t)-1
+ && munmap((caddr_t) accept_mutex, sizeof(*accept_mutex))) {
perror("munmap");
}
+ accept_mutex = (void *)(caddr_t)-1;
}
static void accept_mutex_init(pool *p)
@@ -376,14 +391,25 @@
perror("pthread_mutex_init");
exit(1);
}
+ sigfillset(&accept_block_mask);
+ sigdelset(&accept_block_mask, SIGHUP);
+ sigdelset(&accept_block_mask, SIGTERM);
+ sigdelset(&accept_block_mask, SIGUSR1);
+ register_cleanup(pconf, NULL, accept_mutex_child_cleanup,
+ accept_mutex_child_cleanup);
}
static void accept_mutex_on()
{
+ if (sigprocmask(SIG_BLOCK, &accept_block_mask, &accept_previous_mask)) {
+ perror("sigprocmask(SIG_BLOCK)");
+ exit (1);
+ }
if (pthread_mutex_lock(accept_mutex)) {
perror("pthread_mutex_lock");
exit(1);
}
+ have_accept_mutex = 1;
}
static void accept_mutex_off()
@@ -391,6 +417,24 @@
if (pthread_mutex_unlock(accept_mutex)) {
perror("pthread_mutex_unlock");
exit(1);
+ }
+ /* There is a slight race condition right here... if we were to die right
+ * now, we'd do another pthread_mutex_unlock. Now, doing that would let
+ * another process into the mutex. pthread mutexes are designed to be
+ * fast, as such they don't have protection for things like testing if the
+ * thread owning a mutex is actually unlocking it (or even any way of
+ * testing who owns the mutex).
+ *
+ * If we were to unset have_accept_mutex prior to releasing the mutex
+ * then the race could result in the server unable to serve hits. Doing
+ * it this way means that the server can continue, but an additional
+ * child might be in the critical section ... at least it's still serving
+ * hits.
+ */
+ have_accept_mutex = 0;
+ if (sigprocmask(SIG_SETMASK, &accept_previous_mask, NULL)) {
+ perror("sigprocmask(SIG_SETMASK)");
+ exit (1);
}
}