Switch firmware vq query command to be issued via the async API to allow future parallelization.
exec_virtqueue_async_cmds() is a generic execution function that will be used to issue other async operations as well. Handling of throttled commands is built in. For now the command is still serial but the infrastructure is there to issue commands in parallel, including ratelimiting the number of issued async commands to firmware. A later patch will switch to issuing more commands at a time. Signed-off-by: Dragos Tatulea <dtatu...@nvidia.com> Reviewed-by: Tariq Toukan <tar...@nvidia.com> --- drivers/vdpa/mlx5/core/mlx5_vdpa.h | 2 + drivers/vdpa/mlx5/net/mlx5_vnet.c | 181 +++++++++++++++++++++++++---- 2 files changed, 161 insertions(+), 22 deletions(-) diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 424d445ebee4..12136163d8ad 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -103,6 +103,8 @@ struct mlx5_vdpa_dev { struct workqueue_struct *wq; unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS]; bool suspended; + + struct mlx5_async_ctx async_ctx; }; int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 12133e5d1285..be8df9d9f4df 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1184,40 +1184,173 @@ struct mlx5_virtq_attr { u16 used_index; }; -static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, - struct mlx5_virtq_attr *attr) -{ - int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); - u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; +struct mlx5_virtqueue_query_mem { + u8 in[MLX5_ST_SZ_BYTES(query_virtio_net_q_in)]; + u8 out[MLX5_ST_SZ_BYTES(query_virtio_net_q_out)]; +}; + +struct mlx5_vdpa_async_virtqueue_cmd { + int err; + struct mlx5_async_work cb_work; + struct completion cmd_done; + + void *in; + size_t inlen; + void *out; - void *obj_context; - void *cmd_hdr; + size_t outlen; + + union { + struct mlx5_virtqueue_query_mem query; + }; +}; + +static void virtqueue_cmd_callback(int status, struct mlx5_async_work *context) +{ + struct mlx5_vdpa_async_virtqueue_cmd *cmd = + container_of(context, struct mlx5_vdpa_async_virtqueue_cmd, cb_work); + + cmd->err = mlx5_cmd_check(context->ctx->dev, status, cmd->in, cmd->out); + complete(&cmd->cmd_done); +} + +static int issue_async_cmd(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_async_virtqueue_cmd *cmds, + int issued, + int *completed) + +{ + struct mlx5_vdpa_async_virtqueue_cmd *cmd = &cmds[issued]; int err; - out = kzalloc(outlen, GFP_KERNEL); - if (!out) - return -ENOMEM; +retry: + err = mlx5_cmd_exec_cb(&ndev->mvdev.async_ctx, + cmd->in, cmd->inlen, + cmd->out, cmd->outlen, + virtqueue_cmd_callback, + &cmd->cb_work); + if (err == -EBUSY) { + if (*completed < issued) { + /* Throttled by own commands: wait for oldest completion. */ + wait_for_completion(&cmds[*completed].cmd_done); + (*completed)++; + + goto retry; + } else { + /* Throttled by external commands: switch to sync api. */ + err = mlx5_cmd_exec(ndev->mvdev.mdev, + cmd->in, cmd->inlen, + cmd->out, cmd->outlen); + if (!err) + (*completed)++; + } + } + + return err; +} + +static int exec_virtqueue_async_cmds(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_async_virtqueue_cmd *cmds, + int num_cmds) +{ + int completed = 0; + int issued = 0; + int err = 0; + + for (int i = 0; i < num_cmds; i++) + init_completion(&cmds[i].cmd_done); + + while (issued < num_cmds) { + + err = issue_async_cmd(ndev, cmds, issued, &completed); + if (err) { + mlx5_vdpa_err(&ndev->mvdev, "error issuing command %d of %d: %d\n", + issued, num_cmds, err); + break; + } + + issued++; + } + + while (completed < issued) + wait_for_completion(&cmds[completed++].cmd_done); - cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); + return err; +} + +static void fill_query_virtqueue_cmd(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_virtqueue *mvq, + struct mlx5_vdpa_async_virtqueue_cmd *cmd) +{ + void *cmd_hdr; + + cmd->in = &cmd->query.in; + cmd->inlen = sizeof(cmd->query.in); + cmd->out = &cmd->query.out; + cmd->outlen = sizeof(cmd->query.out); + + cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, cmd->query.in, general_obj_in_cmd_hdr); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); - err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); - if (err) - goto err_cmd; +} + +static void query_virtqueue_end(struct mlx5_vdpa_net *ndev, + struct mlx5_vdpa_async_virtqueue_cmd *cmd, + struct mlx5_virtq_attr *attr) +{ + void *obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, cmd->query.out, obj_context); - obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); memset(attr, 0, sizeof(*attr)); attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); - kfree(out); - return 0; +} -err_cmd: - kfree(out); +static int query_virtqueues(struct mlx5_vdpa_net *ndev, + int start_vq, + int num_vqs, + struct mlx5_virtq_attr *attrs) +{ + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + struct mlx5_vdpa_async_virtqueue_cmd *cmds; + int err = 0; + + WARN(start_vq + num_vqs > mvdev->max_vqs, "query vq range invalid [%d, %d), max_vqs: %u\n", + start_vq, start_vq + num_vqs, mvdev->max_vqs); + + cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL); + if (!cmds) + return -ENOMEM; + + for (int i = 0; i < num_vqs; i++) + fill_query_virtqueue_cmd(ndev, &ndev->vqs[start_vq + i], &cmds[i]); + + err = exec_virtqueue_async_cmds(ndev, cmds, num_vqs); + if (err) { + mlx5_vdpa_err(mvdev, "error issuing query cmd for vq range [%d, %d): %d\n", + start_vq, start_vq + num_vqs, err); + goto done; + } + + for (int i = 0; i < num_vqs; i++) { + struct mlx5_vdpa_async_virtqueue_cmd *cmd = &cmds[i]; + int vq_idx = start_vq + i; + + if (cmd->err) { + mlx5_vdpa_err(mvdev, "query vq %d failed, err: %d\n", vq_idx, err); + if (!err) + err = cmd->err; + continue; + } + + query_virtqueue_end(ndev, cmd, &attrs[i]); + } + +done: + kfree(cmds); return err; } @@ -1542,7 +1675,7 @@ static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mv return err; } - err = query_virtqueue(ndev, mvq, &attr); + err = query_virtqueues(ndev, mvq->index, 1, &attr); if (err) { mlx5_vdpa_err(&ndev->mvdev, "failed to query virtqueue, err: %d\n", err); return err; @@ -2528,7 +2661,7 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa return 0; } - err = query_virtqueue(ndev, mvq, &attr); + err = query_virtqueues(ndev, mvq->index, 1, &attr); if (err) { mlx5_vdpa_err(mvdev, "failed to query virtqueue\n"); return err; @@ -2879,7 +3012,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu int err; if (mvq->initialized) { - err = query_virtqueue(ndev, mvq, &attr); + err = query_virtqueues(ndev, mvq->index, 1, &attr); if (err) return err; } @@ -3854,6 +3987,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, ndev->rqt_size = 1; } + mlx5_cmd_init_async_ctx(mdev, &mvdev->async_ctx); + ndev->mvdev.mlx_features = device_features; mvdev->vdev.dma_dev = &mdev->pdev->dev; err = mlx5_vdpa_alloc_resources(&ndev->mvdev); @@ -3935,6 +4070,8 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * mvdev->wq = NULL; destroy_workqueue(wq); mgtdev->ndev = NULL; + + mlx5_cmd_cleanup_async_ctx(&mvdev->async_ctx); } static const struct vdpa_mgmtdev_ops mdev_ops = { -- 2.45.2