Roi Barkan
Mon, 14 Jul 2008 07:39:22 -0700
Hi,
I'd like to report a bug I've found (I think) in the rwlock on windows.
Take a look at the following lines from the apr_thread_rwlock_unlock()
(trunk/locks/win32/thread_rwlock.c):
...
146 : /* Nope, we must have a read lock */
147 : if (rwlock->readers &&
148 : ! InterlockedDecrement(&rwlock->readers)
&&
149 : ! SetEvent(rwlock->read_event)) {
....
Notice that no mutex is taken during apr_thread_rwlock_unlock().
Now, consider a single reader is performing an unlock, called
InterlockedDecrement() on line 148, received 0 as a result of
InterlockedDecrement, and a context switch now happens right before the
call to SetEvent() in line 149.
Now, let's say that right after the context switch another thread tries
to take the read-lock. Here's the implementation of
apr_thread_rwlock_rdlock_core():
...
65 : DWORD code =
WaitForSingleObject(rwlock->write_mutex, milliseconds);
66 :
67 : if (code == WAIT_FAILED || code ==
WAIT_TIMEOUT)
68 : return APR_FROM_OS_ERROR(code);
69 :
70 : /* We've successfully acquired the writer
mutex, we can't be locked
71 : * for write, so it's OK to add the reader lock.
The writer mutex
72 : * doubles as race condition protection for the
readers counter.
73 : */
74 : InterlockedIncrement(&rwlock->readers);
75 :
76 : if (! ResetEvent(rwlock->read_event))
77 : return apr_get_os_error();
78 :
79 : if (! ReleaseMutex(rwlock->write_mutex))
80 : return apr_get_os_error();
81 :
82 : return APR_SUCCESS;
...
After this thread is done taking the lock, the readers counter is equal
to 1, and the read_event is RESET.
Now, the system might context-switch back to the original thread doing
an unlock operation (at line 149) which will, unfortunately, SET the
event.
If you've followed me so far, we've reached a situation where one thread
has the read-lock, the readers count is equal to 1 but the read_event is
SET.
Now, if any thread will try to acquire a writer's lock, it will SUCCEED,
and not wait for the reader thread to release its reader-lock.
As you can see - this is a race between two readers, which allows a
writer to take the lock while a reader has it.
The comment in lines 71-72 is mesleading and isn't true.
As far as I can see, this issue was introduced 5 years ago, seen on
revision 64541, and is part of all APR versions since 0.9.4.
Below is my suggestion for a fix for this issue, as well as an addition
to the rwlock test which should cause the issue (if you're [un]lucky
enough).
Thanks,
Roi
------------------------------------------------------------------------
--------
Index: include/arch/win32/apr_arch_thread_rwlock.h
===================================================================
--- include/arch/win32/apr_arch_thread_rwlock.h
+++ include/arch/win32/apr_arch_thread_rwlock.h
@@ -22,6 +22,11 @@
struct apr_thread_rwlock_t {
apr_pool_t *pool;
HANDLE write_mutex;
+#if APR_HAS_UNICODE_FS
+ CRITICAL_SECTION read_section;
+#else
+ HANDLE read_mutex;
+#endif
HANDLE read_event;
LONG readers;
};
Index: locks/win32/thread_rwlock.c
===================================================================
--- locks/win32/thread_rwlock.c
+++ locks/win32/thread_rwlock.c
@@ -28,9 +28,16 @@
if (! CloseHandle(rwlock->read_event))
return apr_get_os_error();
- if (! CloseHandle(rwlock->write_mutex))
- return apr_get_os_error();
-
+#if APR_HAS_UNICODE_FS
+ DeleteCriticalSection(&rwlock->read_section);
+#else
+ if (! CloseHandle(rwlock->read_mutex))
+ return apr_get_os_error();
+#endif
+
+ if (! CloseHandle(rwlock->write_mutex))
+ return apr_get_os_error();
+
return APR_SUCCESS;
}
@@ -47,12 +54,27 @@
return apr_get_os_error();
}
- if (! ((*rwlock)->write_mutex = CreateMutex(NULL, FALSE, NULL))) {
+#if APR_HAS_UNICODE_FS
+ InitializeCriticalSection(&(*rwlock)->read_section);
+#else
+ if (! ((*rwlock)->read_mutex = CreateMutex(NULL, FALSE, NULL))) {
CloseHandle((*rwlock)->read_event);
*rwlock = NULL;
return apr_get_os_error();
}
+#endif
+ if (! ((*rwlock)->write_mutex = CreateMutex(NULL, FALSE, NULL)))
{
+ CloseHandle((*rwlock)->read_event);
+#if APR_HAS_UNICODE_FS
+ DeleteCriticalSection(&(*rwlock)->read_section);
+#else
+ CloseHandle((*rwlock)->read_mutex);
+#endif
+ *rwlock = NULL;
+ return apr_get_os_error();
+ }
+
apr_pool_cleanup_register(pool, *rwlock, thread_rwlock_cleanup,
apr_pool_cleanup_null);
@@ -62,24 +84,50 @@
static apr_status_t apr_thread_rwlock_rdlock_core(apr_thread_rwlock_t
*rwlock,
DWORD milliseconds)
{
+ apr_status_t rv = APR_SUCCESS;
DWORD code = WaitForSingleObject(rwlock->write_mutex,
milliseconds);
if (code == WAIT_FAILED || code == WAIT_TIMEOUT)
return APR_FROM_OS_ERROR(code);
- /* We've successfully acquired the writer mutex, we can't be locked
- * for write, so it's OK to add the reader lock. The writer mutex
- * doubles as race condition protection for the readers counter.
- */
+#if APR_HAS_UNICODE_FS
+ code = WAIT_OBJECT_0;
+ if (0 == milliseconds)
+ {
+ if (!TryEnterCriticalSection(&rwlock->read_section))
+ {
+ code = WAIT_TIMEOUT;
+ }
+ }
+ else
+ {
+ EnterCriticalSection(&rwlock->read_section);
+ }
+#else
+ code = WaitForSingleObject(rwlock->read_mutex, milliseconds);
#endif
+ if (code == WAIT_FAILED || code == WAIT_TIMEOUT)
+ {
+ ReleaseMutex(rwlock->write_mutex);
+ return APR_FROM_OS_ERROR(code);
+ }
+
InterlockedIncrement(&rwlock->readers);
if (! ResetEvent(rwlock->read_event))
- return apr_get_os_error();
+ rv = apr_get_os_error();
- if (! ReleaseMutex(rwlock->write_mutex))
- return apr_get_os_error();
-
- return APR_SUCCESS;
+#if APR_HAS_UNICODE_FS
+ LeaveCriticalSection(&rwlock->read_section);
+#else
+ if (! ReleaseMutex(rwlock->read_mutex))
+ rv = (rv == APR_SUCCESS) ? apr_get_os_error() : rv;
#endif
+
+ if (! ReleaseMutex(rwlock->write_mutex))
+ rv = (rv == APR_SUCCESS) ? apr_get_os_error() : rv;
+
+ return rv;
}
APR_DECLARE(apr_status_t) apr_thread_rwlock_rdlock(apr_thread_rwlock_t
*rwlock) @@ -144,6 +192,17 @@
if (rv == APR_FROM_OS_ERROR(ERROR_NOT_OWNER)) {
/* Nope, we must have a read lock */
+ DWORD code = WAIT_OBJECT_0;
+#if APR_HAS_UNICODE_FS
+ EnterCriticalSection(&rwlock->read_section);
+#else
+ code = WaitForSingleObject(rwlock->read_mutex,
INFINITE); #endif
+
+ if (code == WAIT_FAILED || code == WAIT_TIMEOUT)
+ {
+ return APR_FROM_OS_ERROR(code);
+ }
if (rwlock->readers &&
! InterlockedDecrement(&rwlock->readers) &&
! SetEvent(rwlock->read_event)) { @@ -152,6 +211,12 @@
else {
rv = 0;
}
+#if APR_HAS_UNICODE_FS
+ LeaveCriticalSection(&rwlock->read_section);
+#else
+ if (! ReleaseMutex(rwlock->read_mutex))
+ rv = (rv == APR_SUCCESS) ? apr_get_os_error() :
rv; #endif
}
return rv;
Index: test/testlock.c
===================================================================
--- test/testlock.c
+++ test/testlock.c
@@ -37,7 +37,7 @@
static apr_thread_mutex_t *thread_mutex; static apr_thread_rwlock_t
*rwlock; -static int i = 0, x = 0;
+static int i = 0, x = 0, absolute_error = 0;
static int buff[MAX_COUNTER];
@@ -62,7 +62,10 @@
while (1)
{
+ int this_error = 0;
apr_thread_rwlock_rdlock(rwlock);
+ this_error = x - i;
+ absolute_error += this_error >= 0 ? this_error : -this_error;
if (i == MAX_ITER)
exitLoop = 0;
apr_thread_rwlock_unlock(rwlock); @@ -160,6 +163,7 @@
i = 0;
x = 0;
+ absolute_error = 0;
s1 = apr_thread_create(&t1, NULL, thread_mutex_function, NULL, p);
ABTS_INT_EQUAL(tc, APR_SUCCESS, s1); @@ -176,6 +180,7 @@
apr_thread_join(&s4, t4);
ABTS_INT_EQUAL(tc, MAX_ITER, x);
+ ABTS_INT_EQUAL(tc, 0, absolute_error);
}
static void test_thread_rwlock(abts_case *tc, void *data)
Protected by Websense Messaging Security -- www.websense.com