Hi Rainer, sorry for the delay.
On Wed, Apr 19, 2017 at 1:36 PM, Rainer Jung <rainer.j...@kippdata.de> wrote: > > The wait_usec loop tying the apr_proc_mutex_timedlock() very often succeeds > already during the first or second, sometimes the third attempt. But then > the outer loop "while (i < MAX_ITER)" suddenly starts to fail because the > inner loop switches behavior and apr_proc_mutex_timedlock() always gets the > timeout until we stop trying due to wait_usec. Increasing MAX_WAIT_USEC from > 1 sec to 5 sec didn't help. I have no idea, what the root cause for this > behavior switching is. I think what might happen is some pthread_cond_timedwait() spurious wakeup (which I thought was avoided by counting the number of waiters), or another possibility is a wakeup with timeout whereas the cond is simultaneously signaled. I both cases this could lead to a desynchronization between proc_mutex_pthread_acquire_ex() and proc_mutex_pthread_release(). The attached patch prevents this by looping on pthread_cond_[timed]wait() until the condition is satisfied. Would you please try it? Regards, Yann.
Index: locks/unix/proc_mutex.c =================================================================== --- locks/unix/proc_mutex.c (revision 1791915) +++ locks/unix/proc_mutex.c (working copy) @@ -21,6 +21,8 @@ #include "apr_md5.h" /* for apr_md5() */ #include "apr_atomic.h" +/* #undef HAVE_PTHREAD_MUTEX_TIMEDLOCK */ + APR_DECLARE(apr_status_t) apr_proc_mutex_destroy(apr_proc_mutex_t *mutex) { apr_status_t rv = apr_proc_mutex_cleanup(mutex); @@ -701,30 +703,32 @@ static apr_status_t proc_mutex_pthread_acquire_ex( return rv; } - if (!proc_pthread_mutex_cond_locked(mutex)) { - proc_pthread_mutex_cond_locked(mutex) = 1; + if (!timeout) { + rv = proc_pthread_mutex_cond_locked(mutex) ? APR_TIMEUP + : APR_SUCCESS; } - else if (!timeout) { - rv = APR_TIMEUP; - } - else { + else if (proc_pthread_mutex_cond_locked(mutex)) { + struct timespec abstime; + + if (timeout > 0) { + timeout += apr_time_now(); + abstime.tv_sec = apr_time_sec(timeout); + abstime.tv_nsec = apr_time_usec(timeout) * 1000; /* nsecs */ + } + proc_pthread_mutex_cond_num_waiters(mutex)++; + do { if (timeout < 0) { rv = pthread_cond_wait(&proc_pthread_mutex_cond(mutex), &proc_pthread_mutex(mutex)); + if (rv) { #ifdef HAVE_ZOS_PTHREADS - if (rv) { rv = errno; +#endif + break; } -#endif } else { - struct timespec abstime; - - timeout += apr_time_now(); - abstime.tv_sec = apr_time_sec(timeout); - abstime.tv_nsec = apr_time_usec(timeout) * 1000; /* nanoseconds */ - rv = pthread_cond_timedwait(&proc_pthread_mutex_cond(mutex), &proc_pthread_mutex(mutex), &abstime); @@ -735,8 +739,10 @@ static apr_status_t proc_mutex_pthread_acquire_ex( if (rv == ETIMEDOUT) { rv = APR_TIMEUP; } + break; } } + } while (proc_pthread_mutex_cond_locked(mutex)); proc_pthread_mutex_cond_num_waiters(mutex)--; } if (rv) { @@ -743,6 +749,7 @@ static apr_status_t proc_mutex_pthread_acquire_ex( pthread_mutex_unlock(&proc_pthread_mutex(mutex)); return rv; } + proc_pthread_mutex_cond_locked(mutex) = 1; rv = pthread_mutex_unlock(&proc_pthread_mutex(mutex)); if (rv) { @@ -852,25 +859,29 @@ static apr_status_t proc_mutex_pthread_release(apr return rv; } - if (!proc_pthread_mutex_cond_locked(mutex)) { - rv = APR_EINVAL; + if (proc_pthread_mutex_cond_locked(mutex)) { + if (proc_pthread_mutex_cond_num_waiters(mutex)) { + rv = pthread_cond_signal(&proc_pthread_mutex_cond(mutex)); + if (rv) { +#ifdef HAVE_ZOS_PTHREADS + rv = errno; +#endif + pthread_mutex_unlock(&proc_pthread_mutex(mutex)); + return rv; + } + } + proc_pthread_mutex_cond_locked(mutex) = 0; } - else if (proc_pthread_mutex_cond_num_waiters(mutex)) { - rv = pthread_cond_signal(&proc_pthread_mutex_cond(mutex)); + else { + rv = pthread_mutex_unlock(&proc_pthread_mutex(mutex)); if (rv) { #ifdef HAVE_ZOS_PTHREADS rv = errno; #endif + return rv; } + return APR_EINVAL; } - else { - proc_pthread_mutex_cond_locked(mutex) = 0; - rv = APR_SUCCESS; - } - if (rv) { - pthread_mutex_unlock(&proc_pthread_mutex(mutex)); - return rv; - } } #endif /* APR_USE_PROC_PTHREAD_MUTEX_COND */