Fix locking bug in mcast mgr.

When a device is added, the lock is held very early on, and is not released in 
case of errors.  This patch reduces the scope of the lock to only the insertion 
into the mcast manager's port list.

I also added comments and moved code around a bit for the callback invocations, 
where the lock was being released without having been acquired in that 
function, just to make things clearer.

Signed-off-by: Fab Tillier <[email protected]>

diff -dwup3 -X excl.txt -r 
\dev\openib\ofw\gen1\branches\mlx4_30\trunk\core\al\kernel\al_mcast_mgr.c 
.\core\al\kernel\al_mcast_mgr.c
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\core\al\kernel\al_mcast_mgr.c   
Thu May 31 11:22:16 2012
+++ .\core\al\kernel\al_mcast_mgr.c     Fri Aug 10 00:24:34 2012
@@ -439,8 +439,6 @@ mcast_mgr_add_ca_ports(
                return IB_NOT_FOUND;
        }
 
-       cl_spinlock_acquire( &g_mcast_mgr_db.mgr_list_lock );
-
        for ( i = 0; i < p_ci_ca->num_ports; i++)
        {
                mcast_mgr_t*                    p_mcast_mgr;
@@ -475,13 +473,13 @@ mcast_mgr_add_ca_ports(
 
                p_mcast_mgr->close_pending = FALSE;
 
+        cl_spinlock_acquire( &g_mcast_mgr_db.mgr_list_lock );
                InsertTailList(&g_mcast_mgr_db.mgr_list, 
&p_port_info->port_info_entry);
+        cl_spinlock_release( &g_mcast_mgr_db.mgr_list_lock );
 
                AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_ERROR, ("Add CA port 
0x%I64x\n", p_port_info->port_guid) );
        }
 
-       cl_spinlock_release( &g_mcast_mgr_db.mgr_list_lock );
-
        return IB_SUCCESS;
 
 error:
@@ -570,6 +568,9 @@ exit:
 }
 
 
+//
+// This function is called with the global mcast_mgr lock held.
+//
 ib_api_status_t
 mcast_mgr_process_next_request(
        IN              mcast_mgr_t*            p_mcast_mgr,
@@ -608,13 +609,18 @@ mcast_mgr_process_next_request(
                                                
mcast_mgr_print_node_msg(TRACE_LEVEL_ERROR, p_mcast_request, " running join 
request failed - inform user by callback");
                                                
                                                p_mcast_request->req_state = 
RUNNING_CB;
+                        //
+                        // Release the lock before invoking the callback.  
Note that we remove the
+                        // request from the list before invoking the callback, 
so as to allow the
+                        // list to change during the callback without worry.
+                        //
+                                               
RemoveEntryList(&p_mcast_request->request_entry);
                                                
cl_spinlock_release(&p_mcast_mgr->mgr_lock);
                                                mcast_rec.mcast_context = 
p_join_req->mcast_req.mcast_context;
                                                mcast_rec.status = status;
                                                
p_mcast_request->p_join_req->mcast_req.pfn_mcast_cb(&mcast_rec);
                                                
cl_spinlock_acquire(&p_mcast_mgr->mgr_lock);
                                                
-                                               
RemoveEntryList(&p_mcast_request->request_entry);
                                                cl_event_signal( 
&p_mcast_request->cb_finished );
                                                p_mcast_request->req_state = 
DONE;
                                                p_mcast_node->h_mcast = NULL;
@@ -645,12 +651,17 @@ mcast_mgr_process_next_request(
                                        p_mcast_node->connections++;
 
                                        p_mcast_request->req_state = RUNNING_CB;
+                    //
+                    // Release the lock before invoking the callback.  Note 
that we remove the
+                    // request from the list before invoking the callback, so 
as to allow the
+                    // list to change during the callback without worry.
+                    //
+                                       
RemoveEntryList(&p_mcast_request->request_entry);
                                        
cl_spinlock_release(&p_mcast_mgr->mgr_lock);
                                        mcast_rec.mcast_context = 
p_mcast_request->p_join_req->mcast_req.mcast_context;
                                        
p_mcast_request->p_join_req->mcast_req.pfn_mcast_cb(&mcast_rec);
                                        
cl_spinlock_acquire(&p_mcast_mgr->mgr_lock);
 
-                                       
RemoveEntryList(&p_mcast_request->request_entry);
                                        
InsertTailList(&p_mcast_node->connected_req_list, 
&p_mcast_request->request_entry);
                                        p_mcast_request->req_state = DONE;
                                        cl_event_signal( 
&p_mcast_request->cb_finished );
@@ -679,15 +690,19 @@ mcast_mgr_process_next_request(
                                                p_mcast_node->connections--;
 
                                                p_mcast_request->req_state = 
RUNNING_CB;
+                        //
+                        // Release the lock before invoking the callback.  
Note that we remove the
+                        // request from the list before invoking the callback, 
so as to allow the
+                        // list to change during the callback without worry.
+                        //
+                                               
RemoveEntryList(&p_mcast_request->request_entry);
+                                               
RemoveEntryList(&p_join_request->request_entry);
                                                
cl_spinlock_release(&p_mcast_mgr->mgr_lock);
                                                user_context = 
(void*)p_join_request->p_join_req->mcast_req.mcast_context;
                                                
p_join_request->req_dereg_status = IB_SUCCESS;
                                                if 
(p_mcast_request->p_leave_req->leave_cb)
                                                        
p_mcast_request->p_leave_req->leave_cb(user_context);
                                                
cl_spinlock_acquire(&p_mcast_mgr->mgr_lock);
-                                               
-                                               
RemoveEntryList(&p_mcast_request->request_entry);
-                                               
RemoveEntryList(&p_join_request->request_entry);
 
                                                deref_request(p_join_request);
                                                deref_request(p_mcast_request);

Attachment: ndv2.40.patch
Description: ndv2.40.patch

_______________________________________________
ofw mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw

Reply via email to