amdxdna: Add command execution

Lizhi Hou Wed, 06 Nov 2024 10:23:43 -0800


On 11/3/24 22:31, Matthew Brost wrote:

On Wed, Oct 30, 2024 at 08:51:44AM -0700, Lizhi Hou wrote:

Add interfaces for user application to submit command and wait for its
completion.

Co-developed-by: Min Ma <min...@amd.com>
Signed-off-by: Min Ma <min...@amd.com>
Signed-off-by: Lizhi Hou <lizhi....@amd.com>
---
  drivers/accel/amdxdna/aie2_ctx.c              | 664 +++++++++++++++++-
  drivers/accel/amdxdna/aie2_message.c          | 343 +++++++++
  drivers/accel/amdxdna/aie2_pci.c              |   5 +
  drivers/accel/amdxdna/aie2_pci.h              |  35 +
  drivers/accel/amdxdna/aie2_psp.c              |   2 +
  drivers/accel/amdxdna/aie2_smu.c              |   2 +
  drivers/accel/amdxdna/amdxdna_ctx.c           | 330 ++++++++-
  drivers/accel/amdxdna/amdxdna_ctx.h           | 111 +++
  drivers/accel/amdxdna/amdxdna_gem.c           |  10 +
  drivers/accel/amdxdna/amdxdna_gem.h           |   1 +
  .../accel/amdxdna/amdxdna_mailbox_helper.c    |   5 +
  drivers/accel/amdxdna/amdxdna_pci_drv.c       |   5 +
  drivers/accel/amdxdna/amdxdna_pci_drv.h       |   4 +
  drivers/accel/amdxdna/amdxdna_sysfs.c         |   5 +
  drivers/accel/amdxdna/npu1_regs.c             |   1 +
  drivers/accel/amdxdna/npu2_regs.c             |   1 +
  drivers/accel/amdxdna/npu4_regs.c             |   1 +
  drivers/accel/amdxdna/npu5_regs.c             |   1 +
  include/trace/events/amdxdna.h                |  41 ++
  include/uapi/drm/amdxdna_accel.h              |  38 +
  20 files changed, 1596 insertions(+), 9 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 617fc05077d9..c3ac668e16ab 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -8,8 +8,12 @@
  #include <drm/drm_gem.h>
  #include <drm/drm_gem_shmem_helper.h>
  #include <drm/drm_print.h>
+#include <drm/drm_syncobj.h>
+#include <linux/hmm.h>
  #include <linux/types.h>
+#include <trace/events/amdxdna.h>

+#include "aie2_msg_priv.h"

  #include "aie2_pci.h"
  #include "aie2_solver.h"
  #include "amdxdna_ctx.h"
@@ -17,6 +21,337 @@
  #include "amdxdna_mailbox.h"
  #include "amdxdna_pci_drv.h"

+bool force_cmdlist;

+module_param(force_cmdlist, bool, 0600);
+MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
+
+#define HWCTX_MAX_TIMEOUT      60000 /* milliseconds */
+
+static struct amdxdna_sched_job *
+aie2_hwctx_get_job(struct amdxdna_hwctx *hwctx, u64 seq)
+{
+       int idx;
+
+       /* Special sequence number for oldest fence if exist */
+       if (seq == AMDXDNA_INVALID_CMD_HANDLE) {
+               idx = get_job_idx(hwctx->priv->seq);
+               goto out;
+       }
+
+       if (seq >= hwctx->priv->seq)
+               return ERR_PTR(-EINVAL);
+
+       if (seq + HWCTX_MAX_CMDS < hwctx->priv->seq)
+               return NULL;
+
+       idx = get_job_idx(seq);
+
+out:
+       return hwctx->priv->pending[idx];
+}
+
+/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
+static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx 
*hwctx,
+                           struct drm_sched_job *bad_job)
+{
+       drm_sched_stop(&hwctx->priv->sched, bad_job);
+       aie2_destroy_context(xdna->dev_handle, hwctx);
+}
+
+static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx 
*hwctx)
+{
+       struct amdxdna_gem_obj *heap = hwctx->priv->heap;
+       int ret;
+
+       ret = aie2_create_context(xdna->dev_handle, hwctx);
+       if (ret) {
+               XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
+               goto out;
+       }
+
+       ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
+                               heap->mem.userptr, heap->mem.size);
+       if (ret) {
+               XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
+               goto out;
+       }
+
+       if (hwctx->status != HWCTX_STAT_READY) {
+               XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
+               goto out;
+       }
+
+       ret = aie2_config_cu(hwctx);
+       if (ret) {
+               XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
+               goto out;
+       }
+
+out:
+       drm_sched_start(&hwctx->priv->sched);
+       XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
+       return ret;
+}
+
+void aie2_stop_ctx_by_col_map(struct amdxdna_client *client, u32 col_map)
+{
+       struct amdxdna_dev *xdna = client->xdna;
+       struct amdxdna_hwctx *hwctx;
+       int next = 0;
+
+       drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+       mutex_lock(&client->hwctx_lock);
+       idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) {
+               /* check if the HW context uses the error column */
+               if (!(col_map & amdxdna_hwctx_col_map(hwctx)))
+                       continue;
+
+               aie2_hwctx_stop(xdna, hwctx, NULL);
+               hwctx->old_status = hwctx->status;
+               hwctx->status = HWCTX_STAT_STOP;
+               XDNA_DBG(xdna, "Stop %s", hwctx->name);
+       }
+       mutex_unlock(&client->hwctx_lock);
+}
+
+void aie2_restart_ctx(struct amdxdna_client *client)
+{
+       struct amdxdna_dev *xdna = client->xdna;
+       struct amdxdna_hwctx *hwctx;
+       int next = 0;
+
+       drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+       mutex_lock(&client->hwctx_lock);
+       idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) {
+               if (hwctx->status != HWCTX_STAT_STOP)
+                       continue;
+
+               hwctx->status = hwctx->old_status;
+               XDNA_DBG(xdna, "Resetting %s", hwctx->name);
+               aie2_hwctx_restart(xdna, hwctx);
+       }
+       mutex_unlock(&client->hwctx_lock);
+}
+
+static int aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
+{
+       struct amdxdna_sched_job *job;
+
+       mutex_lock(&hwctx->priv->io_lock);
+       if (!hwctx->priv->seq) {
+               mutex_unlock(&hwctx->priv->io_lock);
+               return 0;
+       }
+
+       job = aie2_hwctx_get_job(hwctx, hwctx->priv->seq - 1);
+       if (IS_ERR_OR_NULL(job)) {
+               mutex_unlock(&hwctx->priv->io_lock);
+               XDNA_WARN(hwctx->client->xdna, "Corrupted pending list");
+               return 0;
+       }
+       mutex_unlock(&hwctx->priv->io_lock);
+
+       wait_event(hwctx->priv->job_free_wq, !job->fence);
+
+       return 0;
+}
+
+static void
+aie2_sched_notify(struct amdxdna_sched_job *job)
+{
+       struct dma_fence *fence = job->fence;
+
+       job->hwctx->priv->completed++;
+       dma_fence_signal(fence);
+       trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", 
job->seq);
+       dma_fence_put(fence);
+       mmput(job->mm);
+       amdxdna_job_put(job);
+}
+
+static int
+aie2_sched_resp_handler(void *handle, const u32 *data, size_t size)
+{
+       struct amdxdna_sched_job *job = handle;
+       struct amdxdna_gem_obj *cmd_abo;
+       u32 ret = 0;
+       u32 status;
+
+       cmd_abo = job->cmd_bo;
+
+       if (unlikely(!data))
+               goto out;
+
+       if (unlikely(size != sizeof(u32))) {
+               amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+               ret = -EINVAL;
+               goto out;
+       }
+
+       status = *data;
+       XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
+       if (status == AIE2_STATUS_SUCCESS)
+               amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
+       else
+               amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
+
+out:
+       aie2_sched_notify(job);
+       return ret;
+}
+
+static int
+aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size)
+{
+       struct amdxdna_sched_job *job = handle;
+       u32 ret = 0;
+       u32 status;
+
+       if (unlikely(!data))
+               goto out;
+
+       if (unlikely(size != sizeof(u32))) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       status = *data;
+       XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
+
+out:
+       aie2_sched_notify(job);
+       return ret;
+}
+
+static int
+aie2_sched_cmdlist_resp_handler(void *handle, const u32 *data, size_t size)
+{
+       struct amdxdna_sched_job *job = handle;
+       struct amdxdna_gem_obj *cmd_abo;
+       struct cmd_chain_resp *resp;
+       struct amdxdna_dev *xdna;
+       u32 fail_cmd_status;
+       u32 fail_cmd_idx;
+       u32 ret = 0;
+
+       cmd_abo = job->cmd_bo;
+       if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
+               amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+               ret = -EINVAL;
+               goto out;
+       }
+
+       resp = (struct cmd_chain_resp *)data;
+       xdna = job->hwctx->client->xdna;
+       XDNA_DBG(xdna, "Status 0x%x", resp->status);
+       if (resp->status == AIE2_STATUS_SUCCESS) {
+               amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
+               goto out;
+       }
+
+       /* Slow path to handle error, read from ringbuf on BAR */
+       fail_cmd_idx = resp->fail_cmd_idx;
+       fail_cmd_status = resp->fail_cmd_status;
+       XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
+                fail_cmd_idx, fail_cmd_status);
+
+       if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
+               amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+               ret = -EINVAL;
+               goto out;
+       }
+       amdxdna_cmd_set_state(cmd_abo, fail_cmd_status);
+
+       if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
+               struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, 
NULL);
+
+               cc->error_index = fail_cmd_idx;
+               if (cc->error_index >= cc->command_count)
+                       cc->error_index = 0;
+       }
+out:
+       aie2_sched_notify(job);
+       return ret;
+}
+
+static struct dma_fence *
+aie2_sched_job_run(struct drm_sched_job *sched_job)
+{
+       struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+       struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+       struct amdxdna_hwctx *hwctx = job->hwctx;
+       struct dma_fence *fence;
+       int ret;
+
+       if (!mmget_not_zero(job->mm))
+               return ERR_PTR(-ESRCH);
+
+       kref_get(&job->refcnt);
+       fence = dma_fence_get(job->fence);
+
+       if (unlikely(!cmd_abo)) {
+               ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler);
+               goto out;
+       }
+
+       amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
+
+       if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
+               ret = aie2_cmdlist_multi_execbuf(hwctx, job, 
aie2_sched_cmdlist_resp_handler);
+       else if (force_cmdlist)
+               ret = aie2_cmdlist_single_execbuf(hwctx, job, 
aie2_sched_cmdlist_resp_handler);
+       else
+               ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
+
+out:
+       if (ret) {
+               dma_fence_put(job->fence);
+               amdxdna_job_put(job);
+               mmput(job->mm);
+               fence = ERR_PTR(ret);
+       }
+       trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
+
+       return fence;
+}
+
+static void aie2_sched_job_free(struct drm_sched_job *sched_job)
+{
+       struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+       struct amdxdna_hwctx *hwctx = job->hwctx;
+
+       trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
+       drm_sched_job_cleanup(sched_job);
+       job->fence = NULL;
+       amdxdna_job_put(job);
+
+       wake_up(&hwctx->priv->job_free_wq);
+}
+
+static enum drm_gpu_sched_stat
+aie2_sched_job_timedout(struct drm_sched_job *sched_job)
+{
+       struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+       struct amdxdna_hwctx *hwctx = job->hwctx;
+       struct amdxdna_dev *xdna;
+
+       xdna = hwctx->client->xdna;
+       trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
+       mutex_lock(&xdna->dev_lock);
+       aie2_hwctx_stop(xdna, hwctx, sched_job);
+
+       aie2_hwctx_restart(xdna, hwctx);
+       mutex_unlock(&xdna->dev_lock);
+
+       return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+const struct drm_sched_backend_ops sched_ops = {
+       .run_job = aie2_sched_job_run,
+       .free_job = aie2_sched_job_free,
+       .timedout_job = aie2_sched_job_timedout,
+};
+
  static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
  {
        struct amdxdna_dev *xdna = hwctx->client->xdna;
@@ -126,13 +461,66 @@ static void aie2_release_resource(struct amdxdna_hwctx 
*hwctx)
                XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
  }

+static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)

+{
+       struct amdxdna_dev *xdna = hwctx->client->xdna;
+       struct drm_file *filp = hwctx->client->filp;
+       struct drm_syncobj *syncobj;
+       u32 hdl;
+       int ret;
+
+       hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
+
+       ret = drm_syncobj_create(&syncobj, DRM_SYNCOBJ_CREATE_SIGNALED, NULL);
+       if (ret) {
+               XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
+               return ret;
+       }
+       ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
+       if (ret) {
+               drm_syncobj_put(syncobj);
+               XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
+               return ret;
+       }
+       hwctx->priv->syncobj = syncobj;
+       hwctx->syncobj_hdl = hdl;
+
+       return 0;
+}
+
+static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
+{
+       /*
+        * The syncobj_hdl is owned by user space and will be cleaned up
+        * separately.
+        */
+       drm_syncobj_put(hwctx->priv->syncobj);
+}
+
+static void aie2_ctx_syncobj_add_fence(struct amdxdna_hwctx *hwctx,
+                                      struct dma_fence *ofence, u64 seq)
+{
+       struct drm_syncobj *syncobj = hwctx->priv->syncobj;
+       struct dma_fence_chain *chain;
+
+       if (!syncobj)
+               return;
+
+       chain = dma_fence_chain_alloc();
+       if (!chain)
+               return;
+
+       drm_syncobj_add_point(syncobj, chain, ofence, seq);
+}
+
  int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
  {
        struct amdxdna_client *client = hwctx->client;
        struct amdxdna_dev *xdna = client->xdna;
+       struct drm_gpu_scheduler *sched;
        struct amdxdna_hwctx_priv *priv;
        struct amdxdna_gem_obj *heap;
-       int ret;
+       int i, ret;

priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);

        if (!priv)
@@ -157,10 +545,48 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
                goto put_heap;
        }

+ for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {

+               struct amdxdna_gem_obj *abo;
+               struct amdxdna_drm_create_bo args = {
+                       .flags = 0,
+                       .type = AMDXDNA_BO_DEV,
+                       .vaddr = 0,
+                       .size = MAX_CHAIN_CMDBUF_SIZE,
+               };
+
+               abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, 
client->filp, true);
+               if (IS_ERR(abo)) {
+                       ret = PTR_ERR(abo);
+                       goto free_cmd_bufs;
+               }
+
+               XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
+                        i, abo->mem.dev_addr, abo->mem.size);
+               priv->cmd_buf[i] = abo;
+       }
+
+       sched = &priv->sched;
+       mutex_init(&priv->io_lock);
+       ret = drm_sched_init(sched, &sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT,
+                            HWCTX_MAX_CMDS, 0, 
msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
+                            NULL, NULL, hwctx->name, xdna->ddev.dev);
+       if (ret) {
+               XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
+               goto free_cmd_bufs;
+       }
+
+       ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
+                                   &sched, 1, NULL);
+       if (ret) {
+               XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
+               goto free_sched;
+       }
+       init_waitqueue_head(&priv->job_free_wq);
+
        ret = aie2_hwctx_col_list(hwctx);
        if (ret) {
                XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
-               goto unpin;
+               goto free_entity;
        }

ret = aie2_alloc_resource(hwctx);

@@ -175,6 +601,13 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
                XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
                goto release_resource;
        }
+
+       ret = aie2_ctx_syncobj_create(hwctx);
+       if (ret) {
+               XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
+               goto release_resource;
+       }
+
        hwctx->status = HWCTX_STAT_INIT;

XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);

@@ -185,7 +618,16 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
        aie2_release_resource(hwctx);
  free_col_list:
        kfree(hwctx->col_list);
-unpin:
+free_entity:
+       drm_sched_entity_destroy(&priv->entity);
+free_sched:
+       drm_sched_fini(&priv->sched);
+free_cmd_bufs:
+       for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
+               if (!priv->cmd_buf[i])
+                       continue;
+               drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
+       }
        amdxdna_gem_unpin(heap);
  put_heap:
        drm_gem_object_put(to_gobj(heap));
@@ -196,11 +638,44 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)

void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)

  {
+       struct amdxdna_sched_job *job;
+       struct amdxdna_dev *xdna;
+       int idx;
+
+       xdna = hwctx->client->xdna;
+       aie2_ctx_syncobj_destroy(hwctx);
+       drm_sched_wqueue_stop(&hwctx->priv->sched);
+
+       /* Now, scheduler will not send command to device. */
        aie2_release_resource(hwctx);

+ /*

+        * All submitted commands are aborted.
+        * Restart scheduler queues to cleanup jobs. The amdxdna_sched_job_run()
+        * will return NODEV if it is called.
+        */
+       drm_sched_wqueue_start(&hwctx->priv->sched);
+
+       aie2_hwctx_wait_for_idle(hwctx);
+       drm_sched_entity_destroy(&hwctx->priv->entity);
+       drm_sched_fini(&hwctx->priv->sched);
+
+       for (idx = 0; idx < HWCTX_MAX_CMDS; idx++) {
+               job = hwctx->priv->pending[idx];
+               if (!job)
+                       continue;
+
+               dma_fence_put(job->out_fence);
+               amdxdna_job_put(job);
+       }
+       XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, 
hwctx->priv->seq);
+
+       for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
+               drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
        amdxdna_gem_unpin(hwctx->priv->heap);
        drm_gem_object_put(to_gobj(hwctx->priv->heap));

+ mutex_destroy(&hwctx->priv->io_lock);

        kfree(hwctx->col_list);
        kfree(hwctx->priv);
        kfree(hwctx->cus);
@@ -267,3 +742,186 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 
type, u64 value, void *bu
                return -EOPNOTSUPP;
        }
  }
+
+static int aie2_populate_range(struct amdxdna_gem_obj *abo)
+{
+       struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+       struct mm_struct *mm = abo->mem.notifier.mm;
+       struct hmm_range range = { 0 };
+       unsigned long timeout;
+       int ret;
+
+       XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx",
+                      abo->mem.userptr, abo->mem.size);
+       range.notifier = &abo->mem.notifier;
+       range.start = abo->mem.userptr;
+       range.end = abo->mem.userptr + abo->mem.size;
+       range.hmm_pfns = abo->mem.pfns;
+       range.default_flags = HMM_PFN_REQ_FAULT;
+
+       if (!mmget_not_zero(mm))
+               return -EFAULT;
+
+       timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+again:
+       range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier);
+       mmap_read_lock(mm);
+       ret = hmm_range_fault(&range);
+       mmap_read_unlock(mm);
+       if (ret) {
+               if (time_after(jiffies, timeout)) {
+                       ret = -ETIME;
+                       goto put_mm;
+               }
+
+               if (ret == -EBUSY)
+                       goto again;
+
+               goto put_mm;
+       }
+
+       mutex_lock(&abo->mem.notify_lock);
+       if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) {
+               mutex_unlock(&abo->mem.notify_lock);
+               goto again;
+       }
+       abo->mem.map_invalid = false;
+       mutex_unlock(&abo->mem.notify_lock);
+
+put_mm:
+       mmput(mm);
+       return ret;
+}
+
+static int aie2_hwctx_push_job(struct amdxdna_sched_job *job, u64 *seq)
+{
+       struct amdxdna_hwctx *hwctx = job->hwctx;
+       struct amdxdna_sched_job *other;
+       struct dma_fence *fence;
+       long ret;
+       int idx;
+
+again:
+       mutex_lock(&hwctx->priv->io_lock);
+       idx = get_job_idx(hwctx->priv->seq);
+       /* When pending list full, hwctx->seq points to oldest fence */
+       other = hwctx->priv->pending[idx];
+       if (other && !dma_fence_is_signaled(other->out_fence)) {
+               fence = dma_fence_get(other->out_fence);
+               mutex_unlock(&hwctx->priv->io_lock);
+
+               ret = dma_fence_wait_timeout(fence, true, MAX_SCHEDULE_TIMEOUT);
+               dma_fence_put(fence);
+               if (!ret)
+                       return -ETIME;
+               else if (ret < 0)
+                       return ret;
+               goto again;
+       }
+
+       if (other) {
+               dma_fence_put(other->out_fence);
+               amdxdna_job_put(other);
+       }
+
+       hwctx->priv->pending[idx] = job;
+       job->seq = hwctx->priv->seq++;
+       *seq = job->seq;
+       kref_get(&job->refcnt);
+
+       fence = dma_fence_get(job->out_fence);
+       drm_sched_entity_push_job(&job->base);
+       mutex_unlock(&hwctx->priv->io_lock);
+
+       aie2_ctx_syncobj_add_fence(hwctx, fence, *seq);
+       dma_fence_put(fence);
+       return 0;
+}
+
+int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job 
*job, u64 *seq)
+{
+       struct amdxdna_dev *xdna = hwctx->client->xdna;
+       struct ww_acquire_ctx acquire_ctx;
+       struct amdxdna_gem_obj *abo;
+       unsigned long timeout = 0;
+       int ret, i;
+
+       ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx);
+       if (ret) {
+               XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
+               return ret;
+       }
+
+       drm_sched_job_arm(&job->base);

Again drive by comments.

This looks wildly dangerous. This typically should be called once
holding all locks at the point in which you cannot fail. I get that
you signal the jobs fence on failure but that doesn't seem like a great
idea nor do I think how the schedule is designed.

The flow typically is:

acquire all locks and setup job...
arm
install fences
push

^^^ With not being able to to fail between arn & push.

Your flow is...

arm
acquire locks...
install fences
drop locks...
acquire different locks...
push
drops different locks...

Seems dangerous, I would reconsider.

Ok, I worked on this and will send out v7 patches to follow thesuggested flow.



Thanks,

Lizhi

Re: [PATCH V6 07/10] accel/amdxdna: Add command execution

Reply via email to