Ok, so HTML format mails suck for responding to patches...

timer.patch:

>Index: a/core/complib/user/cl_timer.c
>===================================================================
>--- a/core/complib/user/cl_timer.c     (revision 5977)
>+++ b/core/complib/user/cl_timer.c     (revision 5978)
>@@ -33,25 +33,27 @@
> 
> #include "complib/cl_timer.h"
> 
>-
> static void CALLBACK
> __timer_callback( 
>       IN cl_timer_t* const p_timer,
>       IN BOOLEAN timer_signalled )
> {
>       /* timer_signalled is always TRUE, and has no value. */
>+      
>+      DWORD thread_id = GetCurrentThreadId();
>       CL_ASSERT( timer_signalled );
>+

Don't need this blank line.

>       UNUSED_PARAM( timer_signalled );
> 
>+      CL_ASSERT(thread_id!=0);

Move this assert down, reference p_timer->thread_id, and you can eliminate the 
local variable.

>+
>       p_timer->timeout_time = 0;
>-      p_timer->thread_id = GetCurrentThreadId();
>+      p_timer->thread_id = thread_id;
> 
>       (p_timer->pfn_callback)( (void*)p_timer->context );
> 
>-      p_timer->thread_id = 0;
> }
> 
>-
> void
> cl_timer_construct(
>       IN      cl_timer_t* const       p_timer )

Where's the call to cl_spinlock_construct?

 >Index: a/inc/complib/cl_timer.h
>===================================================================
>--- a/inc/complib/cl_timer.h   (revision 5977)
>+++ b/inc/complib/cl_timer.h   (revision 5978)
>@@ -45,8 +45,8 @@
> 
> 
> #include <complib/cl_types.h>
>+#include <complib/cl_spinlock.h>

This belongs in the cl_timer_osd.h, not here.


cl_timer.c.patch & cl_timer_osd.h should have been a single patch, as they are 
related.  No need for per-file patches.

>--- a/core/complib/user/cl_timer.c     Wed Jun 23 11:09:38 2010
>+++ b/core/complib/user/cl_timer.c     Wed Jun 23 11:13:04 2010
>@@ -59,6 +60,7 @@
>       IN      cl_timer_t* const       p_timer )
> {
>       p_timer->h_timer = NULL;
>+      p_timer->pfn_callback = NULL;
>       p_timer->timeout_time = 0;
>       p_timer->thread_id = 0;

Where's the call to cl_spinlock_construct?

> }
>@@ -89,6 +96,12 @@
>       CL_ASSERT( p_timer );
>       
>       cl_timer_stop( p_timer );
>+      if ( p_timer->pfn_callback )

Don't use the callback as a flag.  If you properly construct the spinlocks, you 
don't need the check.

>+      {
>+              /* construct does not set these, _init does */

construct should construct, init should init.

>+              cl_spinlock_destroy( &p_timer->spinlock );
>+              cl_spinlock_destroy( &p_timer->cb_serialize );
>+      }
> }
> 
>

kernel-timer.patch:

>diff --git a/trunk/core/complib/kernel/cl_timer.c 
>b/trunk/core/complib/kernel/cl_timer.c
>index 34cae8e..ecee4a6 100644
>--- a/trunk/core/complib/kernel/cl_timer.c
>+++ b/trunk/core/complib/kernel/cl_timer.c
>@@ -45,9 +45,13 @@ __timer_callback(
>       UNUSED_PARAM( arg1 );
>       UNUSED_PARAM( arg2 );
> 
>+      cl_spinlock_acquire( &p_timer->spinlock );
>       p_timer->timeout_time = 0;

The only case where you could end up with multiple callbacks executing is if 
you call cl_timer_start from the callback.  It might be easier to flag that 
you're in a callback, store the new timeout time, and delay calling KeSetTimer 
until the callback unwinds.  You could add a flag here to indicate that you're 
in the timer callback, that you check in cl_timer_start to control whether you 
call KeSetTimer...

>+      cl_spinlock_release( &p_timer->spinlock );
> 
>+      KeAcquireGuardedMutex( &p_timer->cb_lock );

timer callback is in a DPC, at DISPATCH_LEVEL.  KeAcquireGuardedMutex must be 
<= APC_LEVEL.

>       (p_timer->pfn_callback)( (void*)p_timer->context );

Check the timeout here, and if non-zero, call KeSetTimer, then clear the 
callback flag.  Note that similar logic could be used in the user-mode timer to 
prevent multiple timer callbacks from running concurrently, while also avoiding 
extra threads spinning on the callback spinlock.

>+      KeReleaseGuardedMutex( &p_timer->cb_lock );
> }
> 
> 
>@@ -75,6 +79,8 @@ cl_timer_init(
> 
>       KeInitializeTimer( &p_timer->timer );
>       KeInitializeDpc( &p_timer->dpc, __timer_callback, p_timer );
>+      cl_spinlock_init( &p_timer->spinlock );

Just use a KSPINLOCK here, no?  Then you can use the in-stack queued spinlocks 
if the lock might be contentious.

>+      KeInitializeGuardedMutex( &p_timer->cb_lock );
> 
>       return( CL_SUCCESS );
> }
>@@ -108,6 +114,7 @@ cl_timer_start(
>       IN      const uint32_t          time_ms )
> {
>       LARGE_INTEGER   due_time;
>+      uint64_t                timeout_time;
> 
>       CL_ASSERT( p_timer );
>       CL_ASSERT( p_timer->pfn_callback );
>@@ -115,11 +122,13 @@ cl_timer_start(
> 
>       /* Due time is in 100 ns increments.  Negative for relative time. */
>       due_time.QuadPart = -(int64_t)(((uint64_t)time_ms) * 10000);
>+      timeout_time = cl_get_time_stamp() + (((uint64_t)time_ms) * 1000);
> 
>-      /* Store the timeout time in the timer object. */
>-      p_timer->timeout_time = cl_get_time_stamp() + (((uint64_t)time_ms) * 
>1000);
>-
>+      cl_spinlock_acquire( &p_timer->spinlock );
>+      p_timer->timeout_time = timeout_time;

Check the 'in callback' flag here to conditionally call KeSetTimer.

>       KeSetTimer( &p_timer->timer, due_time, &p_timer->dpc );
>+      cl_spinlock_release( &p_timer->spinlock );
>+
>       return( CL_SUCCESS );
> }
> 
>@@ -129,19 +138,25 @@ cl_timer_trim(
>       IN      cl_timer_t* const       p_timer,
>       IN      const uint32_t          time_ms )
> {
>+      LARGE_INTEGER   due_time;
>       uint64_t                timeout_time;
> 
>       CL_ASSERT( p_timer );
>       CL_ASSERT( p_timer->pfn_callback );
> 
>-      /* Calculate the timeout time in the timer object. */
>+      /* Due time is in 100 ns increments.  Negative for relative time. */
>+      due_time.QuadPart = -(int64_t)(((uint64_t)time_ms) * 10000);

You can delay calculating this until the check that the new timeout time is 
before the current timeout time.

>       timeout_time = cl_get_time_stamp() + (((uint64_t)time_ms) * 1000);
> 
>       /* Only pull in the timeout time. */
>-      if( p_timer->timeout_time && p_timer->timeout_time < timeout_time )
>-              return( CL_SUCCESS );
>-
>-      return cl_timer_start( p_timer, time_ms );
>+      cl_spinlock_acquire( &p_timer->spinlock );
>+      if( !p_timer->timeout_time || p_timer->timeout_time > timeout_time )
>+      {
>+              p_timer->timeout_time = timeout_time;

Same here, check the 'in callback' flag and defer calling KeSetTimer if set.

>+              KeSetTimer( &p_timer->timer, due_time, &p_timer->dpc );
>+      }
>+      cl_spinlock_release( &p_timer->spinlock );
>+      return( CL_SUCCESS );
> }
> 
> 
>diff --git a/trunk/inc/kernel/complib/cl_timer_osd.h 
>b/trunk/inc/kernel/complib/cl_timer_osd.h
>index 4cb99b0..6817a90 100644
>--- a/trunk/inc/kernel/complib/cl_timer_osd.h
>+++ b/trunk/inc/kernel/complib/cl_timer_osd.h
>@@ -38,7 +38,7 @@
> 
> 
> #include "complib/cl_types.h"
>-
>+#include "complib/cl_spinlock.h"
> 
> /* Timer object definition. */
> typedef struct _cl_timer
>@@ -48,6 +48,8 @@ typedef struct _cl_timer
>       cl_pfn_timer_callback_t pfn_callback;
>       const void                              *context;
>       uint64_t                                timeout_time;
>+      cl_spinlock_t                   spinlock;
>+      KGUARDED_MUTEX                  cb_lock;
> 
> } cl_timer_t;
> 
>
>

Smith, Stan wrote on Wed, 23 Jun 2010 at 11:46:39

> From: Smith, Stan [mailto:[email protected]]
> Sent: Wednesday, June 23, 2010 11:47 AM
> To: Tzachi Dar; [email protected]
> Cc: Yevgeny Kliteynik; Uri Habusha; Fab Tillier
> Subject: RE: [ofw] patch: Fix a race in the cl_timer code that caused
> deadlocks in opensm
> 
> Separated patch files so cut-n-paste into a single patch file did not
> introduce any problems.
> From svn trunk\
> Apply your timer patch as patch -p1 < timer.patch
> Apply:
>    patch -p1 < cl_timer_osd.h.patch    user-mode serialize
>    patch -p1 < cl_timer.c.patch           user-mode serialize
>    patch -p2 < kernel_timer.patch       sean's kernel patches
>    
>    if using opensm 3.3.6 (vnedor-umad) patch -p1 < umad.cpp.patch
> 
_______________________________________________
ofw mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw

Reply via email to