> Quoting Roland Dreier <[EMAIL PROTECTED]>:
> Subject: Re: [PATCH RFC/untested v0.5] IPoIB/CM: fix SRQ WR leak
> 
>  > + * - Put the QP in the Error State
>  > + * - Wait for the Affiliated Asynchronous Last WQE Reached Event;
>  > + * - either:
>  > + *       drain the CQ by invoking the Poll CQ verb and either wait for CQ
>  > + *       to be empty or the number of Poll CQ operations has exceeded
>  > + *       CQ capacity size;
>  > + * - or
>  > + *       post another WR that completes on the same CQ and wait for this
>  > + *       WR to return as a WC; (NB: this is the option that we use)
>  > + * and then invoke a Destroy QP or Reset QP.
> 
> I guess this last line would look better as
> 
>  * - invoke a Destroy QP or Reset QP.

Hmm, I would like to quote the spec *literally*. Maybe
- and then invoke a Destroy QP or Reset QP.

>  > +static struct ib_qp_attr ipoib_cm_err_attr __read_mostly = {
>  > +  .qp_state = IB_QPS_ERR
>  > +};
>  > +
>  > +#define IPOIB_CM_RX_DRAIN_WRID 0x7fffffff
>  > +
>  > +static struct ib_send_wr ipoib_cm_rx_drain_wr __read_mostly = {
>  > +  .wr_id = IPOIB_CM_RX_DRAIN_WRID,
>  > +  .opcode = IB_WR_SEND
>  > +};
> 
> I don't think these are hot enough to be worth marking as __read_mostly.
> (better to leave them in normal .data so that stuff that is written to
> ends up getting spaced out more)

OK, thanks for the suggestion.

>  > +  qp_attr.qp_state = IB_QPS_INIT;
>  > +  qp_attr.port_num = priv->port;
>  > +  qp_attr.qkey = 0;
>  > +  qp_attr.qp_access_flags = 0;
>  > +  ret = ib_modify_qp(priv->cm.rx_drain_qp, &qp_attr,
>  > +                     IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT | 
> IB_QP_QKEY);
>  > +  if (ret) {
>  > +          ipoib_warn(priv, "failed to modify drain QP to INIT: %d\n", 
> ret);
>  > +          goto err_qp;
>  > +  }
>  > +
>  > +  /* We put the QP in error state directly: this way, hardware
>  > +   * will immediately generate WC for each WR we post, without
>  > +   * sending anything on the wire. */
>  > +  qp_attr.qp_state = IB_QPS_ERR;
>  > +  ret = ib_modify_qp(priv->cm.rx_drain_qp, &qp_attr, IB_QP_STATE);
>  > +  if (ret) {
>  > +          ipoib_warn(priv, "failed to modify drain QP to error: %d\n", 
> ret);
>  > +          goto err_qp;
>  > +  }
> 
> This actually seems like a good motivation for the mthca RESET ->
> ERROR fix.  We could avoid the transition to INIT if we fixed mthca
> and mlx4, right?

Yes. That was the motivation.

> (By the way, any interest in making an mlx4 patch to
> fix that too?)

Easy (I also fixed reset to reset on the way).

IB/mlx4: fix RESET -> ERROR and RESET -> RESET transitions

Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>

---

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 5cd7069..c93daab 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -573,7 +573,7 @@ static int to_mlx4_st(enum ib_qp_type type)
        }
 }
 
-static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr 
*attr,
+static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct 
ib_qp_attr *attr,
                                   int attr_mask)
 {
        u8 dest_rd_atomic;
@@ -603,7 +603,7 @@ static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, 
struct ib_qp_attr *att
        return cpu_to_be32(hw_access_flags);
 }
 
-static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, struct ib_qp_attr *attr,
+static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr 
*attr,
                            int attr_mask)
 {
        if (attr_mask & IB_QP_PKEY_INDEX)
@@ -619,7 +619,7 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 
port)
        path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
 }
 
-static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah,
+static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
                         struct mlx4_qp_path *path, u8 port)
 {
        path->grh_mylmc     = ah->src_path_bits & 0x7f;
@@ -655,14 +655,14 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, struct 
ib_ah_attr *ah,
        return 0;
 }
 
-int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-                     int attr_mask, struct ib_udata *udata)
+static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
+                              const struct ib_qp_attr *attr, int attr_mask,
+                              enum ib_qp_state cur_state, enum ib_qp_state 
new_state)
 {
        struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
        struct mlx4_ib_qp *qp = to_mqp(ibqp);
        struct mlx4_qp_context *context;
        enum mlx4_qp_optpar optpar = 0;
-       enum ib_qp_state cur_state, new_state;
        int sqd_event;
        int err = -EINVAL;
 
@@ -670,34 +670,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct 
ib_qp_attr *attr,
        if (!context)
                return -ENOMEM;
 
-       mutex_lock(&qp->mutex);
-
-       cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : 
qp->state;
-       new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
-       if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
-               goto out;
-
-       if ((attr_mask & IB_QP_PKEY_INDEX) &&
-            attr->pkey_index >= dev->dev->caps.pkey_table_len) {
-               goto out;
-       }
-
-       if ((attr_mask & IB_QP_PORT) &&
-           (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) 
{
-               goto out;
-       }
-
-       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
-           attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
-               goto out;
-       }
-
-       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
-           attr->max_dest_rd_atomic > 1 << dev->dev->caps.max_qp_dest_rdma) {
-               goto out;
-       }
-
        context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
                                     (to_mlx4_st(ibqp->qp_type) << 16));
        context->flags     |= cpu_to_be32(1 << 8); /* DE? */
@@ -920,11 +892,83 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct 
ib_qp_attr *attr,
        }
 
 out:
-       mutex_unlock(&qp->mutex);
        kfree(context);
        return err;
 }
 
+static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 };
+static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = {
+               [IB_QPT_UD]  = (IB_QP_PKEY_INDEX                |
+                               IB_QP_PORT                      |
+                               IB_QP_QKEY),
+               [IB_QPT_UC]  = (IB_QP_PKEY_INDEX                |
+                               IB_QP_PORT                      |
+                               IB_QP_ACCESS_FLAGS),
+               [IB_QPT_RC]  = (IB_QP_PKEY_INDEX                |
+                               IB_QP_PORT                      |
+                               IB_QP_ACCESS_FLAGS),
+               [IB_QPT_SMI] = (IB_QP_PKEY_INDEX                |
+                               IB_QP_QKEY),
+               [IB_QPT_GSI] = (IB_QP_PKEY_INDEX                |
+                               IB_QP_QKEY),
+};
+
+int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                     int attr_mask, struct ib_udata *udata)
+{
+       struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
+       struct mlx4_ib_qp *qp = to_mqp(ibqp);
+       enum ib_qp_state cur_state, new_state;
+       int err = -EINVAL;
+
+       mutex_lock(&qp->mutex);
+
+       cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : 
qp->state;
+       new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+       if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
+               goto out;
+
+       if ((attr_mask & IB_QP_PKEY_INDEX) &&
+            attr->pkey_index >= dev->dev->caps.pkey_table_len) {
+               goto out;
+       }
+
+       if ((attr_mask & IB_QP_PORT) &&
+           (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) 
{
+               goto out;
+       }
+
+       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+           attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
+               goto out;
+       }
+
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+           attr->max_dest_rd_atomic > 1 << dev->dev->caps.max_qp_dest_rdma) {
+               goto out;
+       }
+
+       if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+               err = 0;
+               goto out;
+       }
+
+       if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
+               err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr,
+                                         
mlx4_ib_qp_attr_mask_table[ibqp->qp_type],
+                                         IB_QPS_RESET, IB_QPS_INIT);
+               if (err)
+                       goto out;
+               cur_state = IB_QPS_INIT;
+       }
+ 
+       err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+out:
+       mutex_unlock(&qp->mutex);
+       return err;
+}
+
 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
                            void *wqe)
 {

-- 
MST
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to