On Thu, Nov 08, 2018 at 09:10:15PM +0200, Leon Romanovsky wrote:
> From: Moni Shoua <mo...@mellanox.com>
> 
> Telling the HCA that page fault handling is done and QP can resume
> its flow is done in the context of the page fault handler. This blocks
> the handling of the next work in queue without a need.
> Call the PAGE_FAULT_RESUME command in an asynchronous manner and free
> the workqueue to pick the next work item for handling. All tasks that
> were executed after PAGE_FAULT_RESUME need to be done now
> in the callback of the asynchronous command mechanism.
> 
> Signed-off-by: Moni Shoua <mo...@mellanox.com>
> Signed-off-by: Leon Romanovsky <leo...@mellanox.com>
>  drivers/infiniband/hw/mlx5/odp.c | 110 +++++++++++++++++++++++++------
>  include/linux/mlx5/driver.h      |   3 +
>  2 files changed, 94 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx5/odp.c 
> b/drivers/infiniband/hw/mlx5/odp.c
> index abce55b8b9ba..0c4f469cdd5b 100644
> +++ b/drivers/infiniband/hw/mlx5/odp.c
> @@ -298,20 +298,78 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev 
> *dev)
>       return;
>  }
>  
> +struct pfault_resume_cb_ctx {
> +     struct mlx5_ib_dev *dev;
> +     struct mlx5_core_rsc_common *res;
> +     struct mlx5_pagefault *pfault;
> +};
> +
> +static void page_fault_resume_callback(int status, void *context)
> +{
> +     struct pfault_resume_cb_ctx *ctx = context;
> +     struct mlx5_pagefault *pfault = ctx->pfault;
> +
> +     if (status)
> +             mlx5_ib_err(ctx->dev, "Resolve the page fault failed with 
> status %d\n",
> +                         status);
> +
> +     if (ctx->res)
> +             mlx5_core_res_put(ctx->res);
> +     kfree(pfault);
> +     kfree(ctx);
> +}
> +
>  static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
> +                                   struct mlx5_core_rsc_common *res,
>                                     struct mlx5_pagefault *pfault,
> -                                   int error)
> +                                   int error,
> +                                   bool async)
>  {
> +     int ret = 0;
> +     u32 *out = pfault->out_pf_resume;
> +     u32 *in = pfault->in_pf_resume;
> +     u32 token = pfault->token;
>       int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ?
> -                  pfault->wqe.wq_num : pfault->token;
> -     int ret = mlx5_core_page_fault_resume(dev->mdev,
> -                                           pfault->token,
> -                                           wq_num,
> -                                           pfault->type,
> -                                           error);
> -     if (ret)
> -             mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 
> 0x%x\n",
> -                         wq_num);
> +             pfault->wqe.wq_num : pfault->token;
> +     u8 type = pfault->type;
> +     struct pfault_resume_cb_ctx *ctx = NULL;
> +
> +     if (async)
> +             ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);

Why not allocate this ctx ast part of the mlx5_pagefault and avoid
this allocation failure strategy?

Jason

Reply via email to