Hi Rainer,

sorry for the delay.

On Wed, Apr 19, 2017 at 1:36 PM, Rainer Jung <rainer.j...@kippdata.de> wrote:
>
> The wait_usec loop tying the apr_proc_mutex_timedlock() very often succeeds
> already during the first or second, sometimes the third attempt. But then
> the outer loop "while (i < MAX_ITER)" suddenly starts to fail because the
> inner loop switches behavior and apr_proc_mutex_timedlock() always gets the
> timeout until we stop trying due to wait_usec. Increasing MAX_WAIT_USEC from
> 1 sec to 5 sec didn't help. I have no idea, what the root cause for this
> behavior switching is.

I think what might happen is some pthread_cond_timedwait() spurious
wakeup (which I thought was avoided by counting the number of
waiters), or another possibility is a wakeup with timeout whereas the
cond is simultaneously signaled.

I both cases this could lead to a desynchronization between
proc_mutex_pthread_acquire_ex() and proc_mutex_pthread_release().

The attached patch prevents this by looping on
pthread_cond_[timed]wait() until the condition is satisfied.
Would you please try it?


Regards,
Yann.
Index: locks/unix/proc_mutex.c
===================================================================
--- locks/unix/proc_mutex.c	(revision 1791915)
+++ locks/unix/proc_mutex.c	(working copy)
@@ -21,6 +21,8 @@
 #include "apr_md5.h" /* for apr_md5() */
 #include "apr_atomic.h"
 
+/* #undef HAVE_PTHREAD_MUTEX_TIMEDLOCK */
+
 APR_DECLARE(apr_status_t) apr_proc_mutex_destroy(apr_proc_mutex_t *mutex)
 {
     apr_status_t rv = apr_proc_mutex_cleanup(mutex);
@@ -701,30 +703,32 @@ static apr_status_t proc_mutex_pthread_acquire_ex(
             return rv;
         }
 
-        if (!proc_pthread_mutex_cond_locked(mutex)) {
-            proc_pthread_mutex_cond_locked(mutex) = 1;
+        if (!timeout) {
+            rv = proc_pthread_mutex_cond_locked(mutex) ? APR_TIMEUP
+                                                       : APR_SUCCESS;
         }
-        else if (!timeout) {
-            rv = APR_TIMEUP;
-        }
-        else {
+        else if (proc_pthread_mutex_cond_locked(mutex)) {
+            struct timespec abstime;
+
+            if (timeout > 0) {
+                timeout += apr_time_now();
+                abstime.tv_sec = apr_time_sec(timeout);
+                abstime.tv_nsec = apr_time_usec(timeout) * 1000; /* nsecs */
+            }
+
             proc_pthread_mutex_cond_num_waiters(mutex)++;
+            do {
             if (timeout < 0) {
                 rv = pthread_cond_wait(&proc_pthread_mutex_cond(mutex),
                                        &proc_pthread_mutex(mutex));
+                if (rv) {
 #ifdef HAVE_ZOS_PTHREADS
-                if (rv) {
                     rv = errno;
+#endif
+                    break;
                 }
-#endif
             }
             else {
-                struct timespec abstime;
-
-                timeout += apr_time_now();
-                abstime.tv_sec = apr_time_sec(timeout);
-                abstime.tv_nsec = apr_time_usec(timeout) * 1000; /* nanoseconds */
-
                 rv = pthread_cond_timedwait(&proc_pthread_mutex_cond(mutex),
                                             &proc_pthread_mutex(mutex),
                                             &abstime);
@@ -735,8 +739,10 @@ static apr_status_t proc_mutex_pthread_acquire_ex(
                     if (rv == ETIMEDOUT) {
                         rv = APR_TIMEUP;
                     }
+                    break;
                 }
             }
+            } while (proc_pthread_mutex_cond_locked(mutex));
             proc_pthread_mutex_cond_num_waiters(mutex)--;
         }
         if (rv) {
@@ -743,6 +749,7 @@ static apr_status_t proc_mutex_pthread_acquire_ex(
             pthread_mutex_unlock(&proc_pthread_mutex(mutex));
             return rv;
         }
+        proc_pthread_mutex_cond_locked(mutex) = 1;
 
         rv = pthread_mutex_unlock(&proc_pthread_mutex(mutex));
         if (rv) {
@@ -852,25 +859,29 @@ static apr_status_t proc_mutex_pthread_release(apr
             return rv;
         }
 
-        if (!proc_pthread_mutex_cond_locked(mutex)) {
-            rv = APR_EINVAL;
+        if (proc_pthread_mutex_cond_locked(mutex)) {
+            if (proc_pthread_mutex_cond_num_waiters(mutex)) {
+                rv = pthread_cond_signal(&proc_pthread_mutex_cond(mutex));
+                if (rv) {
+#ifdef HAVE_ZOS_PTHREADS
+                    rv = errno;
+#endif
+                    pthread_mutex_unlock(&proc_pthread_mutex(mutex));
+                    return rv;
+                }
+            }
+            proc_pthread_mutex_cond_locked(mutex) = 0;
         }
-        else if (proc_pthread_mutex_cond_num_waiters(mutex)) {
-            rv = pthread_cond_signal(&proc_pthread_mutex_cond(mutex));
+        else {
+            rv = pthread_mutex_unlock(&proc_pthread_mutex(mutex));
             if (rv) {
 #ifdef HAVE_ZOS_PTHREADS
                 rv = errno;
 #endif
+                return rv;
             }
+            return APR_EINVAL;
         }
-        else {
-            proc_pthread_mutex_cond_locked(mutex) = 0;
-            rv = APR_SUCCESS;
-        }
-        if (rv) {
-            pthread_mutex_unlock(&proc_pthread_mutex(mutex));
-            return rv;
-        }
     }
 #endif /* APR_USE_PROC_PTHREAD_MUTEX_COND */
 

Reply via email to