When IP based addressing was introduced, ib_create_ah_from_wc was
changed in order to support a suitable AH. Since this AH should
now contains the DMAC (which isn't a simple derivative of the GID).
In order to find the DMAC, an ARP should sometime be sent. This ARP
is a sleeping context.

ib_create_ah_from_wc is called from cm_alloc_response_msg, which is
sometimes called from an atomic context. This caused a
sleeping-while-atomic bug. Fixing this by splitting
cm_alloc_response_msg to an atomic and sleep-able part. When
cm_alloc_response_msg is used in an atomic context, we try to create
the AH before entering the atomic context.

Fixes: 66bd20a72d2f ('IB/core: Ethernet L2 attributes in verbs/cm structures')
Signed-off-by: Matan Barak <[email protected]>
---

Hi Doug,
This patch fixes an old bug in the CM. IP based addressing requires
ARP resolution which isn't sleep-able by its nature. This resolution
was sometimes done in non sleep-able context. Our regression tests
picked up this bug and verified this fix.

Thanks,
Matan

 drivers/infiniband/core/cm.c | 60 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 49 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index ea4db9c..f5cf1c4 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -287,17 +287,12 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
        return 0;
 }
 
-static int cm_alloc_response_msg(struct cm_port *port,
-                                struct ib_mad_recv_wc *mad_recv_wc,
-                                struct ib_mad_send_buf **msg)
+static int _cm_alloc_response_msg(struct cm_port *port,
+                                 struct ib_mad_recv_wc *mad_recv_wc,
+                                 struct ib_ah *ah,
+                                 struct ib_mad_send_buf **msg)
 {
        struct ib_mad_send_buf *m;
-       struct ib_ah *ah;
-
-       ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
-                                 mad_recv_wc->recv_buf.grh, port->port_num);
-       if (IS_ERR(ah))
-               return PTR_ERR(ah);
 
        m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
                               0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
@@ -312,6 +307,20 @@ static int cm_alloc_response_msg(struct cm_port *port,
        return 0;
 }
 
+static int cm_alloc_response_msg(struct cm_port *port,
+                                struct ib_mad_recv_wc *mad_recv_wc,
+                                struct ib_mad_send_buf **msg)
+{
+       struct ib_ah *ah;
+
+       ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
+                                 mad_recv_wc->recv_buf.grh, port->port_num);
+       if (IS_ERR(ah))
+               return PTR_ERR(ah);
+
+       return _cm_alloc_response_msg(port, mad_recv_wc, ah, msg);
+}
+
 static void cm_free_msg(struct ib_mad_send_buf *msg)
 {
        ib_destroy_ah(msg->ah);
@@ -2201,6 +2210,7 @@ static int cm_dreq_handler(struct cm_work *work)
        struct cm_id_private *cm_id_priv;
        struct cm_dreq_msg *dreq_msg;
        struct ib_mad_send_buf *msg = NULL;
+       struct ib_ah *ah;
        int ret;
 
        dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2213,6 +2223,11 @@ static int cm_dreq_handler(struct cm_work *work)
                return -EINVAL;
        }
 
+       ah = ib_create_ah_from_wc(work->port->mad_agent->qp->pd,
+                                 work->mad_recv_wc->wc,
+                                 work->mad_recv_wc->recv_buf.grh,
+                                 work->port->port_num);
+
        work->cm_event.private_data = &dreq_msg->private_data;
 
        spin_lock_irq(&cm_id_priv->lock);
@@ -2234,9 +2249,13 @@ static int cm_dreq_handler(struct cm_work *work)
        case IB_CM_TIMEWAIT:
                atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
                                counter[CM_DREQ_COUNTER]);
-               if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
+               if (IS_ERR(ah))
+                       goto unlock;
+               if (_cm_alloc_response_msg(work->port, work->mad_recv_wc, ah,
+                                          &msg))
                        goto unlock;
 
+               ah = NULL;
                cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
                               cm_id_priv->private_data,
                               cm_id_priv->private_data_len);
@@ -2259,6 +2278,8 @@ static int cm_dreq_handler(struct cm_work *work)
                list_add_tail(&work->list, &cm_id_priv->work_list);
        spin_unlock_irq(&cm_id_priv->lock);
 
+       if (!IS_ERR_OR_NULL(ah))
+               ib_destroy_ah(ah);
        if (ret)
                cm_process_work(cm_id_priv, work);
        else
@@ -2266,6 +2287,8 @@ static int cm_dreq_handler(struct cm_work *work)
        return 0;
 
 unlock:        spin_unlock_irq(&cm_id_priv->lock);
+       if (!IS_ERR_OR_NULL(ah))
+               ib_destroy_ah(ah);
 deref: cm_deref_id(cm_id_priv);
        return -EINVAL;
 }
@@ -2761,6 +2784,7 @@ static int cm_lap_handler(struct cm_work *work)
        struct cm_lap_msg *lap_msg;
        struct ib_cm_lap_event_param *param;
        struct ib_mad_send_buf *msg = NULL;
+       struct ib_ah *ah;
        int ret;
 
        /* todo: verify LAP request and send reject APR if invalid. */
@@ -2770,6 +2794,12 @@ static int cm_lap_handler(struct cm_work *work)
        if (!cm_id_priv)
                return -EINVAL;
 
+
+       ah = ib_create_ah_from_wc(work->port->mad_agent->qp->pd,
+                                 work->mad_recv_wc->wc,
+                                 work->mad_recv_wc->recv_buf.grh,
+                                 work->port->port_num);
+
        param = &work->cm_event.param.lap_rcvd;
        param->alternate_path = &work->path[0];
        cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
@@ -2786,9 +2816,13 @@ static int cm_lap_handler(struct cm_work *work)
        case IB_CM_MRA_LAP_SENT:
                atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
                                counter[CM_LAP_COUNTER]);
-               if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
+               if (IS_ERR(ah))
+                       goto unlock;
+               if (_cm_alloc_response_msg(work->port, work->mad_recv_wc, ah,
+                                          &msg))
                        goto unlock;
 
+               ah = NULL;
                cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
                              CM_MSG_RESPONSE_OTHER,
                              cm_id_priv->service_timeout,
@@ -2818,6 +2852,8 @@ static int cm_lap_handler(struct cm_work *work)
                list_add_tail(&work->list, &cm_id_priv->work_list);
        spin_unlock_irq(&cm_id_priv->lock);
 
+       if (!IS_ERR_OR_NULL(ah))
+               ib_destroy_ah(ah);
        if (ret)
                cm_process_work(cm_id_priv, work);
        else
@@ -2825,6 +2861,8 @@ static int cm_lap_handler(struct cm_work *work)
        return 0;
 
 unlock:        spin_unlock_irq(&cm_id_priv->lock);
+       if (!IS_ERR_OR_NULL(ah))
+               ib_destroy_ah(ah);
 deref: cm_deref_id(cm_id_priv);
        return -EINVAL;
 }
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to