On 7/1/26 10:55, Lizhi Hou wrote:
When a debug BO command completes, job->drv_cmd may already have been
freed. Accessing it from aie2_sched_drvcmd_resp_handler() can result in
a use-after-free and memory corruption.

Fix this by introducing reference counting for drv_cmd objects and
transferring ownership to the job while it is in flight. This ensures
that the command remains valid until the completion handler finishes
processing it.

Fixes: 7ea046838021 ("accel/amdxdna: Support firmware debug buffer")
Signed-off-by: Lizhi Hou <[email protected]>
Reviwed-by: Mario Limonciello (AMD) <[email protected]>
---
  drivers/accel/amdxdna/aie2_ctx.c    | 68 +++++++++++++++++++++--------
  drivers/accel/amdxdna/amdxdna_ctx.h |  1 +
  2 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 55eb29dece5b..8ec8a4d69b14 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -59,6 +59,18 @@ static bool aie2_tdr_detect(struct amdxdna_dev *xdna)
        return false;
  }
+static void aie2_cmd_release(struct kref *ref)
+{
+       struct amdxdna_drv_cmd *drv_cmd = container_of(ref, struct 
amdxdna_drv_cmd, refcnt);
+
+       kfree(drv_cmd);
+}
+
+static void aie2_cmd_put(struct amdxdna_drv_cmd *drv_cmd)
+{
+       kref_put(&drv_cmd->refcnt, aie2_cmd_release);
+}
+
  static void aie2_job_release(struct kref *ref)
  {
        struct amdxdna_sched_job *job;
@@ -70,6 +82,8 @@ static void aie2_job_release(struct kref *ref)
        wake_up(&job->hwctx->priv->job_free_wq);
        if (job->out_fence)
                dma_fence_put(job->out_fence);
+       if (job->drv_cmd)
+               aie2_cmd_put(job->drv_cmd);
        kfree(job->aie2_job_health);
        kfree(job);
  }
@@ -901,7 +915,7 @@ static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx 
*hwctx, u32 bo_hdl,
  {
        struct amdxdna_client *client = hwctx->client;
        struct amdxdna_dev *xdna = client->xdna;
-       struct amdxdna_drv_cmd cmd = { 0 };
+       struct amdxdna_drv_cmd *cmd;
        struct amdxdna_gem_obj *abo;
        u64 seq;
        int ret;
@@ -912,32 +926,39 @@ static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx 
*hwctx, u32 bo_hdl,
                return -EINVAL;
        }
+ cmd = kzalloc_obj(*cmd);
+       if (!cmd) {
+               ret = -ENOMEM;
+               goto put_obj;
+       }
+       kref_init(&cmd->refcnt);
+
        if (attach) {
                if (abo->assigned_hwctx != AMDXDNA_INVALID_CTX_HANDLE) {
                        ret = -EBUSY;
-                       goto put_obj;
+                       goto put_cmd;
                }
-               cmd.opcode = ATTACH_DEBUG_BO;
+               cmd->opcode = ATTACH_DEBUG_BO;
        } else {
                if (abo->assigned_hwctx != hwctx->id) {
                        ret = -EINVAL;
-                       goto put_obj;
+                       goto put_cmd;
                }
-               cmd.opcode = DETACH_DEBUG_BO;
+               cmd->opcode = DETACH_DEBUG_BO;
        }
- ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
+       ret = amdxdna_cmd_submit(client, cmd, AMDXDNA_INVALID_BO_HANDLE,
                                 &bo_hdl, 1, hwctx->id, &seq);
        if (ret) {
                XDNA_ERR(xdna, "Submit command failed");
-               goto put_obj;
+               goto put_cmd;
        }
aie2_cmd_wait(hwctx, seq);
-       if (cmd.result) {
-               XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
+       if (cmd->result) {
+               XDNA_ERR(xdna, "Response failure 0x%x", cmd->result);
                ret = -EINVAL;
-               goto put_obj;
+               goto put_cmd;
        }
if (attach)
@@ -947,6 +968,8 @@ static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx 
*hwctx, u32 bo_hdl,
XDNA_DBG(xdna, "Config debug BO %d to %s", bo_hdl, hwctx->name); +put_cmd:
+       aie2_cmd_put(cmd);
  put_obj:
        amdxdna_gem_put_obj(abo);
        return ret;
@@ -974,25 +997,32 @@ int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, 
u32 debug_bo_hdl)
  {
        struct amdxdna_client *client = hwctx->client;
        struct amdxdna_dev *xdna = client->xdna;
-       struct amdxdna_drv_cmd cmd = { 0 };
+       struct amdxdna_drv_cmd *cmd;
        u64 seq;
        int ret;
- cmd.opcode = SYNC_DEBUG_BO;
-       ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
+       cmd = kzalloc_obj(*cmd);
+       if (!cmd)
+               return -ENOMEM;
+       kref_init(&cmd->refcnt);
+
+       cmd->opcode = SYNC_DEBUG_BO;
+       ret = amdxdna_cmd_submit(client, cmd, AMDXDNA_INVALID_BO_HANDLE,
                                 &debug_bo_hdl, 1, hwctx->id, &seq);
        if (ret) {
                XDNA_ERR(xdna, "Submit command failed");
-               return ret;
+               goto put_cmd;
        }
aie2_cmd_wait(hwctx, seq);
-       if (cmd.result) {
-               XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
-               return -EINVAL;
+       if (cmd->result) {
+               XDNA_ERR(xdna, "Response failure 0x%x", cmd->result);
+               ret = -EINVAL;
        }
- return 0;
+put_cmd:
+       aie2_cmd_put(cmd);
+       return ret;
  }
static int aie2_populate_range(struct amdxdna_gem_obj *abo)
@@ -1139,6 +1169,8 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct 
amdxdna_sched_job *job,
                dma_resv_add_fence(job->bos[i]->resv, job->out_fence, 
DMA_RESV_USAGE_WRITE);
        job->seq = hwctx->priv->seq++;
        kref_get(&job->refcnt);
+       if (job->drv_cmd)
+               kref_get(&job->drv_cmd->refcnt);
        drm_sched_entity_push_job(&job->base);
*seq = job->seq;
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h 
b/drivers/accel/amdxdna/amdxdna_ctx.h
index aaae16430466..b6bef3af7dab 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -132,6 +132,7 @@ enum amdxdna_job_opcode {
  struct amdxdna_drv_cmd {
        enum amdxdna_job_opcode opcode;
        u32                     result;
+       struct kref             refcnt;
  };
struct app_health_report;

Reply via email to