Hi Alex,
  When were you going to commit this patch?

thanks,

stan.

________________________________
From: [email protected] 
[mailto:[email protected]] On Behalf Of Alex Naslednikov
Sent: Wednesday, June 09, 2010 2:38 AM
To: Fab Tillier
Cc: [email protected]; Uri Habusha
Subject: Re: [ofw] [Patch][Core] Fix at IRP cancellation mechanism

Hi Fab,
Looks excellent !


________________________________
From: Fab Tillier [mailto:[email protected]]
Sent: Tuesday, June 08, 2010 6:34 PM
To: Alex Naslednikov; [email protected]; Leonid Keller; Tzachi Dar
Cc: Uri Habusha
Subject: RE: [ofw] [Patch][Core] Fix at IRP cancellation mechanism

Hi Alex,

Why not move the call to IoSetCancelRoutine up in the function, and check the 
return value?  If the return is NULL, then someone else already cancelled the 
IRP.  If non-NULL, then the routine cancels it.  This also will close any 
potential race with completing the IRP (where the cancel routine should also be 
cleared.)

IoSetCancelRoutine is atomic, so should provide the correct level of 
synchronization.

Thoughts?
-Fab

From: [email protected] 
[mailto:[email protected]] On Behalf Of Alex Naslednikov
Sent: Tuesday, June 08, 2010 12:56 AM
To: [email protected]; Leonid Keller; Tzachi Dar
Cc: Uri Habusha
Subject: [ofw] [Patch][Core] Fix at IRP cancellation mechanism

This patch fixes the race (and BSOD) within IRP cancellation mechanism.
The original problem:
1.Function al_dev_cancel_ioctl() called from the 2 places:
directly from IBAL and by OS (as a callback)
2. This is because I/O request can be cancelled both by user-level mechanism and
by OS.
3. Function al_dev_cancel_ioctl() calls to IoGetCurrentIrpStackLocation() in 
order
to receive context and take a spinlock (p_context->cb_lock)
4. But IoGetCurrentIrpStackLocation() as well further access to stack pointer
should be protected !!!
5. BSOD happened when context or IRP itself were already cancelled by concurrent
calls to al_dev_cancel_ioctl()

Signed-off by: Alexander Naslednikov (xalex at mellanox.co.il)
Index: B:/users/xalex/2_1_0/core/al/kernel/al_dev.c
===================================================================
--- B:/users/xalex/2_1_0/core/al/kernel/al_dev.c (revision 5929)
+++ B:/users/xalex/2_1_0/core/al/kernel/al_dev.c (revision 5930)
@@ -65,9 +65,16 @@
 __proxy_cancel_cblists(
  IN  al_dev_open_context_t   *p_context );

+CL_INLINE void
+al_dev_cancel_ioctl_unlocked(
+ IN     al_dev_open_context_t *p_context,
+ IN OUT    cl_ioctl_handle_t  *p_ioctl );
 static void
 __construct_open_context(
  IN  al_dev_open_context_t   *p_context )
@@ -343,12 +350,19 @@
  p_context->closing = TRUE;

  /* Flush any pending IOCTLs in case user-mode threads died on us. */
+ /* Protect IOCTL cancellation by spinlock to avoid race
+ */
+
+ cl_spinlock_acquire( &p_context->cb_lock );
+
  if( p_context->h_cm_ioctl )
-  al_dev_cancel_ioctl( p_context->h_cm_ioctl );
- if( p_context->h_comp_ioctl )
-  al_dev_cancel_ioctl( p_context->h_comp_ioctl );
+  al_dev_cancel_ioctl_unlocked( p_context, &p_context->h_cm_ioctl );
+ if( p_context->h_comp_ioctl )
+  al_dev_cancel_ioctl_unlocked( p_context, &p_context->h_comp_ioctl );
  if( p_context->h_misc_ioctl )
-  al_dev_cancel_ioctl( p_context->h_misc_ioctl );
+  al_dev_cancel_ioctl_unlocked( p_context, &p_context->h_misc_ioctl );
+
+ cl_spinlock_release ( &p_context->cb_lock );

  while( p_context->ref_cnt )
  {
@@ -514,20 +528,31 @@

  /* Get the stack location. */
  p_io_stack = IoGetCurrentIrpStackLocation( h_ioctl );
+ if (!p_io_stack) {
+  AL_PRINT( TRACE_LEVEL_WARNING, AL_DBG_DEV, ("This IOCTL was previosly 
destroyed \n") );
+  return;
+ }

  p_context = (al_dev_open_context_t *)p_io_stack->FileObject->FsContext;
- ASSERT( p_context );
-
+ if (!p_context) {
+  AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, ("This IOCTL was previosly 
destroyed \n") );
+  return;
+ }
  /* Clear the IOCTL. */
  cl_spinlock_acquire( &p_context->cb_lock );
  switch( cl_ioctl_ctl_code( h_ioctl ) )
  {
  case UAL_GET_COMP_CB_INFO:
   ph_ioctl = &p_context->h_comp_ioctl;
+  ASSERT(ph_ioctl);
   break;
  case UAL_GET_MISC_CB_INFO:
   ph_ioctl = &p_context->h_misc_ioctl;
   break;
+ case NULL:
+  AL_PRINT( TRACE_LEVEL_WARNING, AL_DBG_DEV, ("This IOCTL was previosly 
destroyed \n") );
+  cl_spinlock_release( &p_context->cb_lock );
+  return;
  default:
   AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, ("Invalid CB type\n") );
   ph_ioctl = NULL;
@@ -550,7 +575,33 @@
  AL_EXIT( AL_DBG_DEV );
 }

+/*
+ * Cancel any pending IOCTL calls for the specified type.
+ * This routine is also called when closing the device.
+ * The lock should already be taken by the caller
+ */
+CL_INLINE void
+al_dev_cancel_ioctl_unlocked(
+ IN     al_dev_open_context_t *p_context,
+ IN OUT    cl_ioctl_handle_t  *p_ioctl )
+{
+
+ AL_ENTER( AL_DBG_DEV );

+#pragma warning(push, 3)
+ IoSetCancelRoutine( *p_ioctl, NULL );
+#pragma warning(pop)
+
+ /* Complete the IOCTL. */
+ cl_ioctl_complete( *p_ioctl, CL_CANCELED, 0 );
+ proxy_context_deref( p_context );
+ *p_ioctl = NULL;
+
+ AL_EXIT( AL_DBG_DEV );
+}
+
+
+
 void
 al_dev_cancel_io(
  IN    DEVICE_OBJECT    *p_dev_obj,
Index: B:/users/xalex/2_1_0/inc/kernel/complib/cl_ioctl_osd.h
===================================================================
--- B:/users/xalex/2_1_0/inc/kernel/complib/cl_ioctl_osd.h (revision 5929)
+++ B:/users/xalex/2_1_0/inc/kernel/complib/cl_ioctl_osd.h (revision 5930)
@@ -49,7 +49,9 @@
 #define IOCTL_CODE( type, cmd ) \
  CTL_CODE( type, (cmd & 0x0FFF), METHOD_BUFFERED, FILE_ANY_ACCESS)

+#define BAD_IRP_STACK 0

+
 typedef PIRP cl_ioctl_handle_t;


@@ -101,6 +103,10 @@
  IO_STACK_LOCATION *p_io_stack;

  p_io_stack = IoGetCurrentIrpStackLocation( h_ioctl );
+ if (!p_io_stack) {
+  /* "This IOCTL was previosly destroyed ! */
+  return BAD_IRP_STACK ;
+ }
  return p_io_stack->Parameters.DeviceIoControl.IoControlCode;
 }

_______________________________________________
ofw mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw

Reply via email to