>Jun 20 09:13:50 localhost kernel: dapl_cm_active_cb_handler 138 event = 9
>Jun 20 09:13:50 localhost kernel: dapl_evd_connection_callback 760 event=16389
>Jun 20 09:13:50 localhost kernel: dapl_evd_connection_callback 798 ep = 
>c0a17bf8
>Jun 20 09:13:50 localhost kernel: dapl_ib_disconnect_clean 579 ep=c0a17bf8
>Jun 20 09:13:50 localhost kernel: dapl_ib_disconnect 538 ep=c0a17bf8 flags=0
>Jun 20 09:13:50 localhost kernel: dapl_destroy_cm_id 63
>Jun 20 09:13:50 localhost kernel: dapl_modify_qp_state_to_error 300
>Jun 20 09:13:50 localhost kernel: 
>drivers/infiniband/ulp/dat-provider/dapl_ep.c:1111: 
>spin_lock(drivers/infiniband/ulp/dat-provider/dapl_ep.c:c0a17c08) already 
>locked by drivers/infiniband/ulp/dat-provider/dapl_evd.c/759
>Jun 20 09:13:51 localhost kernel: dapl_modify_qp_state_to_error 305
>Jun 20 09:13:51 localhost kernel: dapl_evd_connection_callback 800 ep = 
>c0a17bf8
>Jun 20 09:13:51 localhost kernel: 
>drivers/infiniband/ulp/dat-provider/dapl_evd.c:802: 
>spin_unlock(drivers/infiniband/ulp/dat-provider/dapl_ep.c:c0a17c08) not locked
>Jun 20 09:13:51 localhost kernel: dapl_cr_callback 434 event=16389
>Jun 20 09:13:51 localhost kernel: dapl_cr_callback 512 ep = c9821bf8
>Jun 20 09:13:51 localhost kernel: dapl_ib_disconnect_clean 579 ep=c982m: DREQ 
>rcvd

Hi Hal,
I think i have found the problem. It is not legal to call ib_modify_qp()
inside spin_lock. This function can sleep.

Please try the patch below and if it works please tell james to ci it.

fix locking problem in cm callback functions

Signed-off-by: Itamar Rabenstein <[EMAIL PROTECTED]>

Index: dapl_openib_util.h
===================================================================
--- dapl_openib_util.h  (revision 2665)
+++ dapl_openib_util.h  (working copy)
@@ -125,7 +125,7 @@
 
 void dapl_ib_reinit_ep(struct dapl_ep *ep);
 
-void dapl_ib_disconnect_clean(struct dapl_ep *ep, boolean_t passive);
+void dapl_ib_disconnect_clean(struct dapl_ep *ep);
 
 u32 dapl_ib_get_async_event(struct ib_event *cause,
                            enum dat_event_number *async_event);
Index: dapl_cr.c
===================================================================
--- dapl_cr.c   (revision 2665)
+++ dapl_cr.c   (working copy)
@@ -479,8 +479,7 @@
                        /* If someone pulled the plug on the EP or connection,
                         * just exit
                         */
-                       spin_unlock_irqrestore(&ep->common.lock, 
-                                              ep->common.flags);
+                       spin_unlock_irqrestore(&ep->common.lock, 
ep->common.flags);
                        status = DAT_SUCCESS;
                        /* Set evd = NULL so we don't generate an event below */
                        evd = NULL;
@@ -504,36 +503,23 @@
                        /* The disconnect has already occurred, we are now
                         * cleaned up and ready to exit
                         */
-                       spin_unlock_irqrestore(&ep->common.lock, 
-                                              ep->common.flags);
+                       spin_unlock_irqrestore(&ep->common.lock, 
ep->common.flags);
                        return;
                }
                ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;
-               dapl_ib_disconnect_clean(ep, FALSE);
                spin_unlock_irqrestore(&ep->common.lock, ep->common.flags);
+               dapl_ib_disconnect_clean(ep);
 
                break;
        case DAT_CONNECTION_EVENT_NON_PEER_REJECTED:
        case DAT_CONNECTION_EVENT_PEER_REJECTED:
        case DAT_CONNECTION_EVENT_UNREACHABLE:
-               /*
-                * After posting an accept the requesting node has
-                * stopped talking.
-                */
+       case DAT_CONNECTION_EVENT_BROKEN:
                spin_lock_irqsave(&ep->common.lock, ep->common.flags);
                ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;
-               dapl_ib_disconnect_clean(ep, FALSE);
                spin_unlock_irqrestore(&ep->common.lock, ep->common.flags);
-
+               dapl_ib_disconnect_clean(ep);
                break;
-       case DAT_CONNECTION_EVENT_BROKEN:
-               spin_lock_irqsave(&ep->common.lock, ep->common.flags);
-               ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;
-               dapl_ib_disconnect_clean(ep, FALSE);
-               spin_unlock_irqrestore(&ep->common.lock,
-                                      ep->common.flags);
-
-               break;
        default:
                evd = NULL;
                dapl_os_assert(0);      /* shouldn't happen */
Index: dapl_evd.c
===================================================================
--- dapl_evd.c  (revision 2665)
+++ dapl_evd.c  (working copy)
@@ -760,7 +760,6 @@
 
        switch (event) {
        case DAT_CONNECTION_EVENT_ESTABLISHED:
-       {
                /* 
                 * If we don't have an EP at this point we are very screwed up 
                 */
@@ -784,65 +783,28 @@
                                      private_data_size);
                }
                spin_unlock_irqrestore(&ep->common.lock, ep->common.flags);
-
                break;
-       }
        case DAT_CONNECTION_EVENT_DISCONNECTED:
-       {
-               /*
-                * EP is now fully disconnected; initiate any post processing
-                * to reset the underlying QP and get the EP ready for
-                * another connection
-                */
                ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;
-               dapl_ib_disconnect_clean(ep, TRUE);
-               spin_unlock_irqrestore(&ep->common.lock,
-                                      ep->common.flags);
-
+               spin_unlock_irqrestore(&ep->common.lock,ep->common.flags);
+               dapl_ib_disconnect_clean(ep);
                break;
-       }
        case DAT_CONNECTION_EVENT_PEER_REJECTED:
-       {
-               ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;
-               dapl_ib_disconnect_clean(ep, TRUE);
-               spin_unlock_irqrestore(&ep->common.lock, ep->common.flags);
-
-               break;
-       }
        case DAT_CONNECTION_EVENT_UNREACHABLE:
-       {
-               ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;
-               dapl_ib_disconnect_clean(ep, TRUE);
-               spin_unlock_irqrestore(&ep->common.lock, ep->common.flags);
-
-               break;
-       }
        case DAT_CONNECTION_EVENT_NON_PEER_REJECTED:
-       {
-               ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;
-               dapl_ib_disconnect_clean(ep, TRUE);
-               spin_unlock_irqrestore(&ep->common.lock, ep->common.flags);
-
-               break;
-       }
        case DAT_CONNECTION_EVENT_BROKEN:
-       {
                ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;
-               dapl_ib_disconnect_clean(ep, FALSE);
                spin_unlock_irqrestore(&ep->common.lock, ep->common.flags);
-
+               dapl_ib_disconnect_clean(ep);
                break;
-       }
        case DAT_CONNECTION_REQUEST_EVENT:
        default:
-       {
                spin_unlock_irqrestore(&ep->common.lock, ep->common.flags);
                evd = NULL;
 
                dapl_os_assert(0);      /* shouldn't happen */
                break;
        }
-       }
 
        /*
         * Post the event
Index: dapl_openib_cm.c
===================================================================
--- dapl_openib_cm.c    (revision 2665)
+++ dapl_openib_cm.c    (working copy)
@@ -562,13 +562,12 @@
  *      void
  *
  */
-void dapl_ib_disconnect_clean(struct dapl_ep *ep_ptr, boolean_t active)
+void dapl_ib_disconnect_clean(struct dapl_ep *ep_ptr)
 {
        int status;
 
        dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    "  >>> dapl_ib_disconnect_clean: EP: %p active %d\n",
-                    ep_ptr, active);
+                    "  >>> dapl_ib_disconnect_clean: EP: %p \n", ep_ptr);
 
        /*
         * Clean up outstanding connection state
-- 
Itamar
_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to