mlx5: Add support for resize CQ

Yann Droneaud Tue, 14 Jan 2014 08:37:45 -0800

Hi,

Le mardi 14 janvier 2014 à 17:45 +0200, Eli Cohen a écrit :
> Implement resize CQ which is a mandatory verb in mlx5.
> 
> Signed-off-by: Eli Cohen <[email protected]>
> 
> Conflicts:
>       include/linux/mlx5/device.h
> ---
>  drivers/infiniband/hw/mlx5/cq.c              | 282 
> +++++++++++++++++++++++++--
>  drivers/infiniband/hw/mlx5/mlx5_ib.h         |   3 +-
>  drivers/infiniband/hw/mlx5/user.h            |   3 +
>  drivers/net/ethernet/mellanox/mlx5/core/cq.c |   4 +-
>  include/linux/mlx5/cq.h                      |  12 +-
>  include/linux/mlx5/device.h                  |   2 +
>  6 files changed, 284 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
> index b4c122e..50b03a8 100644
> --- a/drivers/infiniband/hw/mlx5/cq.c
> +++ b/drivers/infiniband/hw/mlx5/cq.c
> @@ -73,14 +73,24 @@ static void *get_cqe(struct mlx5_ib_cq *cq, int n)
>       return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
>  }
>  
> +static u8 sw_ownership_bit(int n, int nent)
> +{
> +     return (n & nent) ? 1 : 0;
> +}
> +
>  static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
>  {
>       void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
>       struct mlx5_cqe64 *cqe64;
>  
>       cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
> -     return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^
> -             !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
> +
> +     if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
> +         !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 
> 1)))) {
> +             return cqe;
> +     } else {
> +             return NULL;
> +     }
>  }
>  
>  static void *next_cqe_sw(struct mlx5_ib_cq *cq)
> @@ -351,6 +361,11 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct 
> mlx5_cqe64 *cqe64,
>       qp->sq.last_poll = tail;
>  }
>  
> +static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
> +{
> +     mlx5_buf_free(&dev->mdev, &buf->buf);
> +}
> +
>  static int mlx5_poll_one(struct mlx5_ib_cq *cq,
>                        struct mlx5_ib_qp **cur_qp,
>                        struct ib_wc *wc)
> @@ -366,6 +381,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
>       void *cqe;
>       int idx;
>  
> +repoll:
>       cqe = next_cqe_sw(cq);
>       if (!cqe)
>               return -EAGAIN;
> @@ -379,7 +395,18 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
>        */
>       rmb();
>  
> -     /* TBD: resize CQ */
> +     opcode = cqe64->op_own >> 4;
> +     if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
> +             if (likely(cq->resize_buf)) {
> +                     free_cq_buf(dev, &cq->buf);
> +                     cq->buf = *cq->resize_buf;
> +                     kfree(cq->resize_buf);
> +                     cq->resize_buf = NULL;
> +                     goto repoll;
> +             } else {
> +                     mlx5_ib_warn(dev, "unexpected resize cqe\n");
> +             }
> +     }
>  
>       qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
>       if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
> @@ -398,7 +425,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
>       }
>  
>       wc->qp  = &(*cur_qp)->ibqp;
> -     opcode = cqe64->op_own >> 4;
>       switch (opcode) {
>       case MLX5_CQE_REQ:
>               wq = &(*cur_qp)->sq;
> @@ -503,15 +529,11 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct 
> mlx5_ib_cq_buf *buf,
>               return err;
>  
>       buf->cqe_size = cqe_size;
> +     buf->nent = nent;
>  
>       return 0;
>  }
>  
> -static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
> -{
> -     mlx5_buf_free(&dev->mdev, &buf->buf);
> -}
> -
>  static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
>                         struct ib_ucontext *context, struct mlx5_ib_cq *cq,
>                         int entries, struct mlx5_create_cq_mbox_in **cqb,
> @@ -576,16 +598,16 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, 
> struct ib_ucontext *context)
>       ib_umem_release(cq->buf.umem);
>  }
>  
> -static void init_cq_buf(struct mlx5_ib_cq *cq, int nent)
> +static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
>  {
>       int i;
>       void *cqe;
>       struct mlx5_cqe64 *cqe64;
>  
> -     for (i = 0; i < nent; i++) {
> -             cqe = get_cqe(cq, i);
> -             cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64;
> -             cqe64->op_own = 0xf1;
> +     for (i = 0; i < buf->nent; i++) {
> +             cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
> +             cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
> +             cqe64->op_own = MLX5_CQE_INVALID << 4;
>       }
>  }
>  
> @@ -610,7 +632,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, 
> struct mlx5_ib_cq *cq,
>       if (err)
>               goto err_db;
>  
> -     init_cq_buf(cq, entries);
> +     init_cq_buf(cq, &cq->buf);
>  
>       *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
>       *cqb = mlx5_vzalloc(*inlen);
> @@ -836,7 +858,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 
> cq_period)
>       in->ctx.cq_period = cpu_to_be16(cq_period);
>       in->ctx.cq_max_count = cpu_to_be16(cq_count);
>       in->field_select = cpu_to_be32(fsel);
> -     err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in);
> +     err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in, sizeof(*in));
>       kfree(in);
>  
>       if (err)
> @@ -845,9 +867,235 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, 
> u16 cq_period)
>       return err;
>  }
>  
> +static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
> +                    int entries, struct ib_udata *udata, int *npas,
> +                    int *page_shift, int *cqe_size)
> +{
> +     struct mlx5_ib_resize_cq ucmd;
> +     struct ib_umem *umem;
> +     int err;
> +     int npages;
> +     struct ib_ucontext *context = cq->buf.umem->context;
> +
> +     if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
> +             return -EFAULT;
> +


You might also write

         err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
         if (err)
                 return err;

Then you should check reserved fields being set to the default value:
As noted by Daniel Vetter in its article "Botching up ioctls"[1]
  "Check *all* unused fields and flags and all the padding for whether 
   it's 0, and reject the ioctl if that's not the case. Otherwise your 
   nice plan for future extensions is going right down the gutters 
   since someone *will* submit an ioctl struct with random stack 
   garbage in the yet unused parts. Which then bakes in the ABI that 
   those fields can never be used for anything else but garbage."
It's  important to ensure that reserved fields are set to known value,
so that it will be possible to use them latter to extend the ABI.

[1] http://blog.ffwll.ch/2013/11/botching-up-ioctls.html

         if (ucmd.reserved0 || ucmd.reserved1)
                 return -EINVAL;

> +     umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
> +                        IB_ACCESS_LOCAL_WRITE, 1);
> +     if (IS_ERR(umem)) {
> +             err = PTR_ERR(umem);
> +             return err;
> +     }
> +
> +     mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift,
> +                        npas, NULL);
> +
> +     cq->resize_umem = umem;
> +     *cqe_size = ucmd.cqe_size;
> +
> +     return 0;
> +}
> +


>  int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata 
> *udata)
>  {
> -     return -ENOSYS;
> +     struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
> +     struct mlx5_ib_cq *cq = to_mcq(ibcq);
> +     struct mlx5_modify_cq_mbox_in *in;
> +     int err;
> +     int npas;
> +     int page_shift;
> +     int inlen;
> +     int uninitialized_var(cqe_size);
> +     unsigned long flags;
> +
> +     if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
> +             pr_info("Firmware does not support resize CQ\n");
> +             return -ENOSYS;
> +     }
> +
> +     if (entries < 1)
> +             return -EINVAL;
> +
> +     entries = roundup_pow_of_two(entries + 1);
> +     if (entries > dev->mdev.caps.max_cqes + 1)
> +             return -EINVAL;
> +
> +     if (entries == ibcq->cqe + 1)
> +             return 0;
> +
> +     mutex_lock(&cq->resize_mutex);
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

> +     if (udata) {
> +             err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
> +                               &cqe_size);
> +     } else {
> +             cqe_size = 64;
> +             err = resize_kernel(dev, cq, entries, cqe_size);
> +             if (!err) {
> +                     npas = cq->resize_buf->buf.npages;
> +                     page_shift = cq->resize_buf->buf.page_shift;
> +             }
> +     }
> +
> +     if (err)
> +             goto ex;
> +
> +     inlen = sizeof(*in) + npas * sizeof(in->pas[0]);
> +     in = mlx5_vzalloc(inlen);
> +     if (!in) {
> +             err = -ENOMEM;
> +             goto ex_resize;
> +     }
> +
> +     if (udata)
> +             mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
> +                                  in->pas, 0);
> +     else
> +             mlx5_fill_page_array(&cq->resize_buf->buf, in->pas);
> +
> +     in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE  |
> +                                    MLX5_MODIFY_CQ_MASK_PG_OFFSET |
> +                                    MLX5_MODIFY_CQ_MASK_PG_SIZE);
> +     in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
> +     in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
> +     in->ctx.page_offset = 0;
> +     in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24);
> +     in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE);
> +     in->cqn = cpu_to_be32(cq->mcq.cqn);
> +
> +     err = mlx5_core_modify_cq(&dev->mdev, &cq->mcq, in, inlen);
> +     if (err)
> +             goto ex_alloc;
> +
> +     if (udata) {
> +             cq->ibcq.cqe = entries - 1;
> +             ib_umem_release(cq->buf.umem);
> +             cq->buf.umem = cq->resize_umem;
> +             cq->resize_umem = NULL;
> +     } else {
> +             struct mlx5_ib_cq_buf tbuf;
> +             int resized = 0;
> +
> +             spin_lock_irqsave(&cq->lock, flags);
> +             if (cq->resize_buf) {
> +                     err = copy_resize_cqes(cq);
> +                     if (!err) {
> +                             tbuf = cq->buf;
> +                             cq->buf = *cq->resize_buf;
> +                             kfree(cq->resize_buf);
> +                             cq->resize_buf = NULL;
> +                             resized = 1;
> +                     }
> +             }
> +             cq->ibcq.cqe = entries - 1;
> +             spin_unlock_irqrestore(&cq->lock, flags);
> +             if (resized)
> +                     free_cq_buf(dev, &tbuf);
> +     }
> +     mutex_unlock(&cq->resize_mutex);
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Is everything in this section really critical.
For example, allocating and setting 'in' structure or releasing the
ressources could probably move outside the mutex protected section ?

> +
> +     mlx5_vfree(in);
> +     return 0;
> +
> +ex_alloc:
> +     mlx5_vfree(in);
> +
> +ex_resize:
> +     if (udata)
> +             un_resize_user(cq);
> +     else
> +             un_resize_kernel(dev, cq);
> +ex:
> +     mutex_unlock(&cq->resize_mutex);
> +     return err;
>  }
>  

>  
> 
>  int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
> -                     struct mlx5_modify_cq_mbox_in *in)
> +                     struct mlx5_modify_cq_mbox_in *in, int in_sz)
                                                            ^^^^^^^^^^

Should probably be 'unsigned' ? size_t ?

>  {
>       struct mlx5_modify_cq_mbox_out out;
>       int err;
>  
>       memset(&out, 0, sizeof(out));
>       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MODIFY_CQ);
> -     err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
> +     err = mlx5_cmd_exec(dev, in, in_sz, &out, sizeof(out));
>       if (err)
>               return err;
>  
> @@ -158,7 +166,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, 
> struct mlx5_core_cq *cq);
>  int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
>                      struct mlx5_query_cq_mbox_out *out);
>  int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
> -                     struct mlx5_modify_cq_mbox_in *in);
> +                     struct mlx5_modify_cq_mbox_in *in, int in_sz);
                                                            ^^^^^^^^^^
same here.

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
> index dbb03ca..87e2371 100644
> --- a/include/linux/mlx5/device.h
> +++ b/include/linux/mlx5/device.h
> @@ -710,6 +711,7 @@ struct mlx5_modify_cq_mbox_in {
>  
>  struct mlx5_modify_cq_mbox_out {
>       struct mlx5_outbox_hdr  hdr;
> +     u8                      rsvd[8];
>  };
>  
>  struct mlx5_enable_hca_mbox_in {
> 

It not clear why 8 bytes are needed here ?

Regards.

-- 
Yann Droneaud
OPTEYA


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH for-next 09/14] IB/mlx5: Add support for resize CQ

Reply via email to