Hefty, Sean wrote on Thu, 24 Jun 2010 at 11:28:39
> 
> We are likely hitting another issue here.  If thread_id is not reset to
> 0 and not set under the cb_serialize lock, then the check in
> cl_timer_stop will not work reliably.  Moving code around until some
> test case passes isn't the approach we should be using.  Both code
> segments above are racy.  We're dealing with some race conditions that
> aren't going to be easy to reproduce.
> 
> Tzachi successfully identified races in cl_timer.  We need to fix
> those, and if the fallout is that other bugs are more easily exposed,
> with consistent failures, then that's a good thing.

Here's my attempt at a fix for this.  It delays queuing the timer if the 
callback is already running.  It builds, but I have not tested it.  
Couple of things:
- Stopping the timer cannot hold the spinlock while waiting for the timer to be 
deleted.  This also means that by the time the timer is deleted and the code 
takes the lock again, the timer could have been started by another thread.
- the timeout_time is now used in the callback routine to reset the timer if 
non-zero.
- If the thread ID is set, any call to start or trim the timer will simply set 
the timeout_time and return.
- If the timer is stopped, it will block as long as it's not making the call in 
the context of the timer callback.

I believe the code fixes the identified races, and guarantees a single callback 
at a time without holding a lock for the callback.

The attached patch replaces all previous patches to the user-mode timer for 
this issue.  It does not affect the kernel mode timer.  Note that the attached 
patch preserves tabs, but the inline patch to the mail converts them to spaces.

Signed-off-by: Fab Tillier <[email protected]>

Index: core/complib/user/cl_timer.c
===================================================================
--- core/complib/user/cl_timer.c    (revision 2833)
+++ core/complib/user/cl_timer.c    (working copy)
@@ -39,16 +39,54 @@ __timer_callback( 
    IN cl_timer_t* const p_timer,
    IN BOOLEAN timer_signalled )
 {
+   uint64_t time_now;
+   uint32_t time_ms;
+
    /* timer_signalled is always TRUE, and has no value. */
    CL_ASSERT( timer_signalled );
    UNUSED_PARAM( timer_signalled );
 
-   p_timer->timeout_time = 0;
+   cl_spinlock_acquire( &p_timer->spinlock );
    p_timer->thread_id = GetCurrentThreadId();
+   CL_ASSERT( p_timer->thread_id != 0 );
 
-   (p_timer->pfn_callback)( (void*)p_timer->context );
+   do
+   {
+       p_timer->timeout_time = 0;
+       cl_spinlock_release( &p_timer->spinlock );
+
+       (p_timer->pfn_callback)( (void*)p_timer->context );
+
+       cl_spinlock_acquire( &p_timer->spinlock );
+       if( p_timer->timeout_time == 0 )
+       {
+           /* Timer was not reset, we're done here. */
+           break;
+       }
+
+       /* User set the timer again while the callback was running. */
+       time_now = cl_get_time_stamp();
+       if( time_now >= p_timer->timeout_time )
+       {
+           /* Time expired already - invoke the callback. */
+           continue;
+       }
+
+       time_ms = (uint32_t)((p_timer->timeout_time - time_now) / 1000ULL);
+
+       if( !CreateTimerQueueTimer( &p_timer->h_timer, NULL, __timer_callback,
+           p_timer, time_ms, 0, WT_EXECUTEINIOTHREAD ) )
+       {
+           /* Error: just invoke the callback now... */
+           continue;
+       }
+       /* Timer set, break out of the loop and let the timer expire. */
+       break;
+
+   } while( p_timer->timeout_time != 0 );
 
    p_timer->thread_id = 0;
+   cl_spinlock_release( &p_timer->spinlock );
 }
 
 
@@ -57,10 +95,12 @@ cl_timer_construct(
    IN  cl_timer_t* const   p_timer )
 {
    p_timer->h_timer = NULL;
+   p_timer->pfn_callback = NULL;
    p_timer->timeout_time = 0;
    p_timer->thread_id = 0;
-}
 
+   cl_spinlock_construct( &p_timer->spinlock );
+}
 
 
 cl_status_t
@@ -76,7 +116,7 @@ cl_timer_init( 
 
    p_timer->pfn_callback = pfn_callback;
    p_timer->context = context;
-   return( CL_SUCCESS );
+   return cl_spinlock_init(&p_timer->spinlock);
 }
 
 
@@ -87,48 +127,99 @@ cl_timer_destroy(
    CL_ASSERT( p_timer );
    
    cl_timer_stop( p_timer );
+   cl_spinlock_destroy( &p_timer->spinlock );
 }
 
 
-cl_status_t 
-cl_timer_start(
+static void
+__cl_timer_stop_locked(
+   IN  cl_timer_t* const   p_timer
+   )
+{
+   /*
+    * We must loop here, because we release the lock to delete the timer, and
+    * a different thread might start it before we wakeup and get the lock.
+    */
+   while( p_timer->h_timer )
+   {
+       /* Timer already queued. */
+       HANDLE h_timer = p_timer->h_timer;
+       p_timer->h_timer = NULL;
+       /* Make sure we block until the timer is cancelled. */
+       cl_spinlock_release( &p_timer->spinlock );
+       DeleteTimerQueueTimer( NULL, h_timer, INVALID_HANDLE_VALUE );
+       cl_spinlock_acquire( &p_timer->spinlock );
+   }
+}
+
+
+static cl_status_t
+__cl_timer_start_locked(
    IN  cl_timer_t* const   p_timer,
-   IN  const uint32_t      time_ms )
+   IN  const uint32_t      time_ms,
+   IN  const uint64_t      timeout_time )
 {
-   CL_ASSERT( p_timer );
+   p_timer->timeout_time = timeout_time;
 
-   cl_timer_stop( p_timer );
+   if( p_timer->thread_id != 0 )
+   {
+       /* Timer callback executing. */
+       return( CL_SUCCESS );
+   }
 
-   p_timer->timeout_time = cl_get_time_stamp() + (((uint64_t)time_ms) * 1000);
+   __cl_timer_stop_locked( p_timer );
 
    if( !CreateTimerQueueTimer( &p_timer->h_timer, NULL, __timer_callback,
        p_timer, time_ms, 0, WT_EXECUTEINIOTHREAD ) )
    {
        return( CL_ERROR );
    }
-   
    return( CL_SUCCESS );
 }
 
 
 cl_status_t 
+cl_timer_start(
+   IN  cl_timer_t* const   p_timer,
+   IN  const uint32_t      time_ms )
+{
+   cl_status_t status = CL_SUCCESS;
+   CL_ASSERT( p_timer );
+
+   cl_spinlock_acquire(&p_timer->spinlock);
+   status = __cl_timer_start_locked(
+       p_timer,
+       time_ms,
+       cl_get_time_stamp() + (((uint64_t)time_ms) * 1000)
+       );
+   cl_spinlock_release(&p_timer->spinlock);
+   return status;
+}
+
+
+cl_status_t 
 cl_timer_trim(
    IN  cl_timer_t* const   p_timer,
    IN  const uint32_t      time_ms )
 {
    uint64_t        timeout_time;
+   cl_status_t     status = CL_SUCCESS;
 
    CL_ASSERT( p_timer );
    CL_ASSERT( p_timer->pfn_callback );
 
+   cl_spinlock_acquire(&p_timer->spinlock);
+
    /* Calculate the timeout time in the timer object. */
    timeout_time = cl_get_time_stamp() + (((uint64_t)time_ms) * 1000);
 
    /* Only pull in the timeout time. */
-   if( p_timer->timeout_time && p_timer->timeout_time < timeout_time )
-       return( CL_SUCCESS );
-
-   return cl_timer_start( p_timer, time_ms );
+   if( p_timer->timeout_time > timeout_time )
+   {
+       status = __cl_timer_start_locked( p_timer, time_ms, timeout_time );
+   }
+   cl_spinlock_release(&p_timer->spinlock);
+   return status;
 }
 
 
@@ -137,14 +228,15 @@ cl_timer_stop(
    IN  cl_timer_t* const   p_timer )
 {
    CL_ASSERT( p_timer );
+   cl_spinlock_acquire(&p_timer->spinlock);
 
-   if( p_timer->h_timer && p_timer->thread_id != GetCurrentThreadId() )
+   if( p_timer->thread_id != GetCurrentThreadId() )
    {
-       /* Make sure we block until the timer is cancelled. */
-       DeleteTimerQueueTimer( NULL, p_timer->h_timer, INVALID_HANDLE_VALUE );
-       p_timer->h_timer = NULL;
+       __cl_timer_stop_locked( p_timer );
    }
    p_timer->timeout_time = 0;
+   
+   cl_spinlock_release(&p_timer->spinlock);
 }
 
 
Index: inc/user/complib/cl_timer_osd.h
===================================================================
--- inc/user/complib/cl_timer_osd.h (revision 2833)
+++ inc/user/complib/cl_timer_osd.h (working copy)
@@ -37,6 +37,7 @@
 
 
 #include "cl_types.h"
+#include "cl_spinlock.h"
 
 
 /* Timer object definition. */
@@ -47,8 +48,9 @@ typedef struct _cl_timer
    const void              *context;
    uint64_t                timeout_time;
    DWORD                   thread_id;
+   cl_spinlock_t           spinlock;
 
 } cl_timer_t;
 
 
-#endif // _CL_TIMER_OSD_H_
\ No newline at end of file
+#endif // _CL_TIMER_OSD_H_

Attachment: cl_timer.patch
Description: cl_timer.patch

_______________________________________________
ofw mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw

Reply via email to