Applied to drm-misc-fixes

On 7/1/26 14:28, Mario Limonciello wrote:


On 7/1/26 10:55, Lizhi Hou wrote:
When a debug BO command completes, job->drv_cmd may already have been
freed. Accessing it from aie2_sched_drvcmd_resp_handler() can result in
a use-after-free and memory corruption.

Fix this by introducing reference counting for drv_cmd objects and
transferring ownership to the job while it is in flight. This ensures
that the command remains valid until the completion handler finishes
processing it.

Fixes: 7ea046838021 ("accel/amdxdna: Support firmware debug buffer")
Signed-off-by: Lizhi Hou <[email protected]>
Reviwed-by: Mario Limonciello (AMD) <[email protected]>
---
  drivers/accel/amdxdna/aie2_ctx.c    | 68 +++++++++++++++++++++--------
  drivers/accel/amdxdna/amdxdna_ctx.h |  1 +
  2 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 55eb29dece5b..8ec8a4d69b14 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -59,6 +59,18 @@ static bool aie2_tdr_detect(struct amdxdna_dev *xdna)
      return false;
  }
  +static void aie2_cmd_release(struct kref *ref)
+{
+    struct amdxdna_drv_cmd *drv_cmd = container_of(ref, struct amdxdna_drv_cmd, refcnt);
+
+    kfree(drv_cmd);
+}
+
+static void aie2_cmd_put(struct amdxdna_drv_cmd *drv_cmd)
+{
+    kref_put(&drv_cmd->refcnt, aie2_cmd_release);
+}
+
  static void aie2_job_release(struct kref *ref)
  {
      struct amdxdna_sched_job *job;
@@ -70,6 +82,8 @@ static void aie2_job_release(struct kref *ref)
      wake_up(&job->hwctx->priv->job_free_wq);
      if (job->out_fence)
          dma_fence_put(job->out_fence);
+    if (job->drv_cmd)
+        aie2_cmd_put(job->drv_cmd);
      kfree(job->aie2_job_health);
      kfree(job);
  }
@@ -901,7 +915,7 @@ static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl,
  {
      struct amdxdna_client *client = hwctx->client;
      struct amdxdna_dev *xdna = client->xdna;
-    struct amdxdna_drv_cmd cmd = { 0 };
+    struct amdxdna_drv_cmd *cmd;
      struct amdxdna_gem_obj *abo;
      u64 seq;
      int ret;
@@ -912,32 +926,39 @@ static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl,
          return -EINVAL;
      }
  +    cmd = kzalloc_obj(*cmd);
+    if (!cmd) {
+        ret = -ENOMEM;
+        goto put_obj;
+    }
+    kref_init(&cmd->refcnt);
+
      if (attach) {
          if (abo->assigned_hwctx != AMDXDNA_INVALID_CTX_HANDLE) {
              ret = -EBUSY;
-            goto put_obj;
+            goto put_cmd;
          }
-        cmd.opcode = ATTACH_DEBUG_BO;
+        cmd->opcode = ATTACH_DEBUG_BO;
      } else {
          if (abo->assigned_hwctx != hwctx->id) {
              ret = -EINVAL;
-            goto put_obj;
+            goto put_cmd;
          }
-        cmd.opcode = DETACH_DEBUG_BO;
+        cmd->opcode = DETACH_DEBUG_BO;
      }
  -    ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
+    ret = amdxdna_cmd_submit(client, cmd, AMDXDNA_INVALID_BO_HANDLE,
                   &bo_hdl, 1, hwctx->id, &seq);
      if (ret) {
          XDNA_ERR(xdna, "Submit command failed");
-        goto put_obj;
+        goto put_cmd;
      }
        aie2_cmd_wait(hwctx, seq);
-    if (cmd.result) {
-        XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
+    if (cmd->result) {
+        XDNA_ERR(xdna, "Response failure 0x%x", cmd->result);
          ret = -EINVAL;
-        goto put_obj;
+        goto put_cmd;
      }
        if (attach)
@@ -947,6 +968,8 @@ static int aie2_hwctx_cfg_debug_bo(struct amdxdna_hwctx *hwctx, u32 bo_hdl,
        XDNA_DBG(xdna, "Config debug BO %d to %s", bo_hdl, hwctx->name);
  +put_cmd:
+    aie2_cmd_put(cmd);
  put_obj:
      amdxdna_gem_put_obj(abo);
      return ret;
@@ -974,25 +997,32 @@ int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl)
  {
      struct amdxdna_client *client = hwctx->client;
      struct amdxdna_dev *xdna = client->xdna;
-    struct amdxdna_drv_cmd cmd = { 0 };
+    struct amdxdna_drv_cmd *cmd;
      u64 seq;
      int ret;
  -    cmd.opcode = SYNC_DEBUG_BO;
-    ret = amdxdna_cmd_submit(client, &cmd, AMDXDNA_INVALID_BO_HANDLE,
+    cmd = kzalloc_obj(*cmd);
+    if (!cmd)
+        return -ENOMEM;
+    kref_init(&cmd->refcnt);
+
+    cmd->opcode = SYNC_DEBUG_BO;
+    ret = amdxdna_cmd_submit(client, cmd, AMDXDNA_INVALID_BO_HANDLE,
                   &debug_bo_hdl, 1, hwctx->id, &seq);
      if (ret) {
          XDNA_ERR(xdna, "Submit command failed");
-        return ret;
+        goto put_cmd;
      }
        aie2_cmd_wait(hwctx, seq);
-    if (cmd.result) {
-        XDNA_ERR(xdna, "Response failure 0x%x", cmd.result);
-        return -EINVAL;
+    if (cmd->result) {
+        XDNA_ERR(xdna, "Response failure 0x%x", cmd->result);
+        ret = -EINVAL;
      }
  -    return 0;
+put_cmd:
+    aie2_cmd_put(cmd);
+    return ret;
  }
    static int aie2_populate_range(struct amdxdna_gem_obj *abo)
@@ -1139,6 +1169,8 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,           dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE);
      job->seq = hwctx->priv->seq++;
      kref_get(&job->refcnt);
+    if (job->drv_cmd)
+        kref_get(&job->drv_cmd->refcnt);
      drm_sched_entity_push_job(&job->base);
        *seq = job->seq;
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index aaae16430466..b6bef3af7dab 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -132,6 +132,7 @@ enum amdxdna_job_opcode {
  struct amdxdna_drv_cmd {
      enum amdxdna_job_opcode    opcode;
      u32            result;
+    struct kref        refcnt;
  };
    struct app_health_report;

Reply via email to