There are 2 race conditions in peruser.
1) Due to lack of locking multiple Multiplexers exit from poll-ing
listening sockets and only 1 accept it, others will block on accept,
normally this is ok
but on graceful restart these blocked multiplexers stay behind
blocking, the listening sockets sometimes segfaults. With this patch
graceful restarts are not noticeable as they should, without it on our
servers graceful restarts produce around 1 minute downtime (2xdc
opteron with ~600 Processors)
2) same thing in recv_from_multiplexer(), workers get blocked on
recv_msg() and they cant read the pod
The patch reenables locking around pool() for multiplexers only. To
make this working you have to use 'User nobody' in apache config
if your multiplexers run as nobody.
And it fixes 2 by making it non blocking, unlucky workers get back to
the poll(). Maybe this should be fixed by multiplexer sending some
killmessage threw the control pipe while gracefull restart and making
recv_from_multiplexer blocking again, or do some per senv locking.
Unblocking it isn't optimal for performance but it
fixes graceful restarts and I can upp ExpireTimeout significantly
because no children are lost anymore.
--
Michal Grzedzicki
diff -ur server/mpm/experimental/peruser/peruser.c server/mpm/experimental/peruser/peruser.c
--- server/mpm/experimental/peruser/peruser.c 2008-05-12 10:26:35.348640050 +0200
+++ server/mpm/experimental/peruser/peruser.c 2008-05-12 10:57:47.841668046 +0200
@@ -511,10 +511,12 @@
static void accept_mutex_on(void)
{
-/* for some reason this fails if we listen on the pipe_of_death.
- fortunately I don't think we currently need it */
-#if 0
+/* We need it because on multicore machines many multiplexers
+ returns from poll() at once and they get blocked on accept()
+ because only one will get the data, this brakes graceful
+ restarts */
+
apr_status_t rv = apr_proc_mutex_lock(accept_mutex);
if (rv != APR_SUCCESS) {
const char *msg = "couldn't grab the accept mutex";
@@ -529,12 +531,12 @@
exit(APEXIT_CHILDFATAL);
}
}
-#endif
+
}
static void accept_mutex_off(void)
{
-#if 0
+
apr_status_t rv = apr_proc_mutex_unlock(accept_mutex);
if (rv != APR_SUCCESS) {
const char *msg = "couldn't release the accept mutex";
@@ -552,7 +554,6 @@
exit(APEXIT_CHILDFATAL);
}
}
-#endif
}
/* On some architectures it's safe to do unserialized accept()s in the single
@@ -715,8 +716,10 @@
ret = apr_socket_recv(lr->sd, &pipe_read_char, &n);
if (APR_STATUS_IS_EAGAIN(ret))
{
- /* It lost the lottery. It must continue to suffer
- * through a life of servitude. */
+ /* It lost the lottery. It must continue to suffer
+ * through a life of servitude. */
+ _DBG("POD read EAGAIN");
+ return ret;
}
else
{
@@ -1294,13 +1297,24 @@
/* -- receive data from socket -- */
apr_os_sock_get(&ctrl_sock_fd, lr->sd);
_DBG("receiving from sock_fd=%d", ctrl_sock_fd);
- ret = recvmsg(ctrl_sock_fd, &msg, 0);
-
- if(ret == -1)
+
+ //Don't block
+ ret = recvmsg(ctrl_sock_fd, &msg, MSG_DONTWAIT);
+
+ if(ret==-1 && (errno == EAGAIN) ) {
+ //If the message was read by some other worker
+ _DBG("receive_from_multiplexer recvmsg() EAGAIN, someone was faster");
+ return APR_EAGAIN;
+ }
+
+ if(ret == -1) {
_DBG("recvmsg failed with error \"%s\"", strerror(errno));
- else
+ //Error, better kill this child to be on the safe side
+ return APR_EGENERAL;
+ }
+ else {
_DBG("recvmsg returned %d", ret);
-
+ }
/* -- extract socket from the cmsg -- */
memcpy(&trans_sock_fd, CMSG_DATA(cmsg), sizeof(trans_sock_fd));
/* here *trans_sock always == NULL (socket reset at got_fd), so
@@ -1676,8 +1690,9 @@
* Wait for an acceptable connection to arrive.
*/
- /* Lock around "accept", if necessary */
- SAFE_ACCEPT(accept_mutex_on());
+ // only pultiplexers lock on accept(), workers use non blocking i/o
+ if(CHILD_INFO_TABLE[my_child_num].type == CHILD_TYPE_MULTIPLEXER )
+ accept_mutex_on();
if (num_listensocks == 1) {
offset = 0;
@@ -1729,7 +1744,11 @@
* defer the exit
*/
status = listensocks[offset].accept_func((void *)&sock, &listensocks[offset], ptrans);
- SAFE_ACCEPT(accept_mutex_off()); /* unlock after "accept" */
+
+ //Unlock the multiplexers
+ if(CHILD_INFO_TABLE[my_child_num].type == CHILD_TYPE_MULTIPLEXER )
+ accept_mutex_off();
+
if (status == APR_EGENERAL) {
/* resource shortage or should-not-occur occured */
@@ -2173,7 +2192,7 @@
return 1;
}
-#if 0
+
#if APR_USE_SYSVSEM_SERIALIZE
if (ap_accept_lock_mech == APR_LOCK_DEFAULT ||
ap_accept_lock_mech == APR_LOCK_SYSVSEM) {
@@ -2189,7 +2208,6 @@
return 1;
}
}
-#endif
if (!is_graceful) {
if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
_______________________________________________
Peruser mailing list
[email protected]
http://www.telana.com/mailman/listinfo/peruser