On 5/5/26 11:09, Lizhi Hou wrote:
From: Nishad Saraf <[email protected]>

NPU firmware requires a host-allocated work buffer for hardware contexts.
Allocate a 4 MB host buffer and attach it to device during device init.

Refactor aie2_alloc_msg_buffer() and aie2_free_msg_buffer() into common
helpers by moving them to aie.c and renaming them to
amdxdna_alloc_msg_buffer() and amdxdna_free_msg_buffer(), allowing both
AIE2 and AIE4 to reuse the implementation.

Signed-off-by: Nishad Saraf <[email protected]>
Signed-off-by: Lizhi Hou <[email protected]>
Reviewed-by: Mario Limonciello (AMD) <[email protected]>
---
  drivers/accel/amdxdna/aie.c             | 34 +++++++++++++++
  drivers/accel/amdxdna/aie.h             |  4 ++
  drivers/accel/amdxdna/aie2_error.c      |  7 ++--
  drivers/accel/amdxdna/aie2_message.c    | 49 +++-------------------
  drivers/accel/amdxdna/aie2_pci.h        |  4 --
  drivers/accel/amdxdna/aie4_message.c    | 18 ++++++++
  drivers/accel/amdxdna/aie4_msg_priv.h   | 14 +++++++
  drivers/accel/amdxdna/aie4_pci.c        | 55 ++++++++++++++++++++++++-
  drivers/accel/amdxdna/aie4_pci.h        |  5 +++
  drivers/accel/amdxdna/amdxdna_pci_drv.c |  3 +-
  10 files changed, 141 insertions(+), 52 deletions(-)

diff --git a/drivers/accel/amdxdna/aie.c b/drivers/accel/amdxdna/aie.c
index a31051cc1ec8..4db2fd80a032 100644
--- a/drivers/accel/amdxdna/aie.c
+++ b/drivers/accel/amdxdna/aie.c
@@ -162,3 +162,37 @@ int amdxdna_get_metadata(struct aie_device *aie,
        kfree(meta);
        return ret;
  }
+
+void *amdxdna_alloc_msg_buffer(struct amdxdna_dev *xdna, u32 *size,
+                              dma_addr_t *dma_addr)
+{
+       void *vaddr;
+       int order;
+
+       *size = max_t(u32, *size, SZ_8K);
+       order = get_order(*size);
+       if (order > MAX_PAGE_ORDER)
+               return ERR_PTR(-EINVAL);
+       *size = PAGE_SIZE << order;
+
+       if (amdxdna_iova_on(xdna))
+               return amdxdna_iommu_alloc(xdna, *size, dma_addr);
+
+       vaddr = dma_alloc_noncoherent(xdna->ddev.dev, *size, dma_addr,
+                                     DMA_FROM_DEVICE, GFP_KERNEL);
+       if (!vaddr)
+               return ERR_PTR(-ENOMEM);
+
+       return vaddr;
+}
+
+void amdxdna_free_msg_buffer(struct amdxdna_dev *xdna, size_t size,
+                            void *cpu_addr, dma_addr_t dma_addr)
+{
+       if (amdxdna_iova_on(xdna)) {
+               amdxdna_iommu_free(xdna, size, cpu_addr, dma_addr);
+               return;
+       }
+
+       dma_free_noncoherent(xdna->ddev.dev, size, cpu_addr, dma_addr, 
DMA_FROM_DEVICE);
+}
diff --git a/drivers/accel/amdxdna/aie.h b/drivers/accel/amdxdna/aie.h
index 4bb3719ee0c0..70618204c0ab 100644
--- a/drivers/accel/amdxdna/aie.h
+++ b/drivers/accel/amdxdna/aie.h
@@ -121,6 +121,10 @@ int aie_check_protocol(struct aie_device *aie, u32 
fw_major, u32 fw_minor);
  void amdxdna_vbnv_init(struct amdxdna_dev *xdna);
  int amdxdna_get_metadata(struct aie_device *aie, struct amdxdna_client 
*client,
                         struct amdxdna_drm_get_info *args);
+void *amdxdna_alloc_msg_buffer(struct amdxdna_dev *xdna, u32 *size,
+                              dma_addr_t *dma_addr);
+void amdxdna_free_msg_buffer(struct amdxdna_dev *xdna, size_t size,
+                            void *cpu_addr, dma_addr_t dma_addr);
/* aie_psp.c */
  struct psp_device *aiem_psp_create(struct drm_device *ddev, struct psp_config 
*conf);
diff --git a/drivers/accel/amdxdna/aie2_error.c 
b/drivers/accel/amdxdna/aie2_error.c
index 70007b4363cd..babdac0157ab 100644
--- a/drivers/accel/amdxdna/aie2_error.c
+++ b/drivers/accel/amdxdna/aie2_error.c
@@ -11,6 +11,7 @@
  #include <linux/kthread.h>
  #include <linux/kernel.h>
+#include "aie.h"
  #include "aie2_msg_priv.h"
  #include "aie2_pci.h"
  #include "amdxdna_error.h"
@@ -338,7 +339,7 @@ void aie2_error_async_events_free(struct amdxdna_dev_hdl 
*ndev)
        destroy_workqueue(events->wq);
        mutex_lock(&xdna->dev_lock);
- aie2_free_msg_buffer(ndev, events->size, events->buf, events->addr);
+       amdxdna_free_msg_buffer(xdna, events->size, events->buf, events->addr);
        kfree(events);
  }
@@ -354,7 +355,7 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
        if (!events)
                return -ENOMEM;
- events->buf = aie2_alloc_msg_buffer(ndev, &total_size, &events->addr);
+       events->buf = amdxdna_alloc_msg_buffer(xdna, &total_size, 
&events->addr);
        if (IS_ERR(events->buf)) {
                ret = PTR_ERR(events->buf);
                goto free_events;
@@ -394,7 +395,7 @@ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl 
*ndev)
  free_wq:
        destroy_workqueue(events->wq);
  free_buf:
-       aie2_free_msg_buffer(ndev, events->size, events->buf, events->addr);
+       amdxdna_free_msg_buffer(xdna, events->size, events->buf, events->addr);
  free_events:
        kfree(events);
        return ret;
diff --git a/drivers/accel/amdxdna/aie2_message.c 
b/drivers/accel/amdxdna/aie2_message.c
index f555ffecea6f..0417c6a4c80a 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -27,43 +27,6 @@
#define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops) -void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
-                           dma_addr_t *dma_addr)
-{
-       struct amdxdna_dev *xdna = ndev->aie.xdna;
-       void *vaddr;
-       int order;
-
-       *size = max(*size, SZ_8K);
-       order = get_order(*size);
-       if (order > MAX_PAGE_ORDER)
-               return ERR_PTR(-EINVAL);
-       *size = PAGE_SIZE << order;
-
-       if (amdxdna_iova_on(xdna))
-               return amdxdna_iommu_alloc(xdna, *size, dma_addr);
-
-       vaddr = dma_alloc_noncoherent(xdna->ddev.dev, *size, dma_addr,
-                                     DMA_FROM_DEVICE, GFP_KERNEL);
-       if (!vaddr)
-               return ERR_PTR(-ENOMEM);
-
-       return vaddr;
-}
-
-void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
-                         void *cpu_addr, dma_addr_t dma_addr)
-{
-       struct amdxdna_dev *xdna = ndev->aie.xdna;
-
-       if (amdxdna_iova_on(xdna)) {
-               amdxdna_iommu_free(xdna, size, cpu_addr, dma_addr);
-               return;
-       }
-
-       dma_free_noncoherent(xdna->ddev.dev, size, cpu_addr, dma_addr, 
DMA_FROM_DEVICE);
-}
-
  int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev)
  {
        DECLARE_AIE_MSG(suspend, MSG_OP_SUSPEND);
@@ -376,7 +339,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char 
__user *buf,
        int ret;
buf_sz = ndev->aie.metadata.cols * ndev->aie.metadata.size;
-       buff_addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
+       buff_addr = amdxdna_alloc_msg_buffer(xdna, &buf_sz, &dma_addr);
        if (IS_ERR(buff_addr))
                return PTR_ERR(buff_addr);
@@ -415,7 +378,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
        *cols_filled = aie_bitmap;
fail:
-       aie2_free_msg_buffer(ndev, buf_sz, buff_addr, dma_addr);
+       amdxdna_free_msg_buffer(xdna, buf_sz, buff_addr, dma_addr);
        return ret;
  }
@@ -434,7 +397,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
                return -EINVAL;
buf_sz = min(size, SZ_4M);
-       addr = aie2_alloc_msg_buffer(ndev, &buf_sz, &dma_addr);
+       addr = amdxdna_alloc_msg_buffer(xdna, &buf_sz, &dma_addr);
        if (IS_ERR(addr))
                return PTR_ERR(addr);
@@ -466,7 +429,7 @@ int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
        header->minor = resp.minor;
free_buf:
-       aie2_free_msg_buffer(ndev, buf_sz, addr, dma_addr);
+       amdxdna_free_msg_buffer(xdna, buf_sz, addr, dma_addr);
        return ret;
  }
@@ -1176,7 +1139,7 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, u32 context_id,
        }
buf_size = sizeof(*report);
-       buf = aie2_alloc_msg_buffer(ndev, &buf_size, &dma_addr);
+       buf = amdxdna_alloc_msg_buffer(xdna, &buf_size, &dma_addr);
        if (IS_ERR(buf)) {
                XDNA_ERR(xdna, "Failed to allocate buffer for app health");
                return PTR_ERR(buf);
@@ -1197,7 +1160,7 @@ int aie2_query_app_health(struct amdxdna_dev_hdl *ndev, 
u32 context_id,
        memcpy(report, buf, sizeof(*report));
free_buf:
-       aie2_free_msg_buffer(ndev, buf_size, buf, dma_addr);
+       amdxdna_free_msg_buffer(xdna, buf_size, buf, dma_addr);
        return ret;
  }
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index c884fed610f9..33b6c84e8b6e 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -290,10 +290,6 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct 
amdxdna_sched_job *job,
  int aie2_config_debug_bo(struct amdxdna_hwctx *hwctx, struct 
amdxdna_sched_job *job,
                         int (*notify_cb)(void *, void __iomem *, size_t));
  int aie2_update_prop_time_quota(struct amdxdna_dev_hdl *ndev, u32 us);
-void *aie2_alloc_msg_buffer(struct amdxdna_dev_hdl *ndev, u32 *size,
-                           dma_addr_t *dma_addr);
-void aie2_free_msg_buffer(struct amdxdna_dev_hdl *ndev, size_t size,
-                         void *cpu_addr, dma_addr_t dma_addr);
/* aie2_hwctx.c */
  int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
diff --git a/drivers/accel/amdxdna/aie4_message.c 
b/drivers/accel/amdxdna/aie4_message.c
index ac89a9a842b2..d85df04c5f6b 100644
--- a/drivers/accel/amdxdna/aie4_message.c
+++ b/drivers/accel/amdxdna/aie4_message.c
@@ -62,3 +62,21 @@ int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, 
struct aie_metadata *m
return 0;
  }
+
+int aie4_attach_work_buffer(struct amdxdna_dev_hdl *ndev)
+{
+       DECLARE_AIE_MSG(aie4_msg_attach_work_buffer, 
AIE4_MSG_OP_ATTACH_WORK_BUFFER);
+       struct amdxdna_dev *xdna = ndev->aie.xdna;
+       int ret;
+
+       req.buff_addr = ndev->work_buf_addr;
+       req.buff_size = AIE4_WORK_BUFFER_MIN_SIZE;
+
+       ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+       if (ret)
+               XDNA_ERR(xdna, "Failed to attach work buffer, ret %d", ret);
+       else
+               XDNA_DBG(xdna, "Attached work buffer");
+
+       return ret;
+}
diff --git a/drivers/accel/amdxdna/aie4_msg_priv.h 
b/drivers/accel/amdxdna/aie4_msg_priv.h
index 69e220e40900..af0866045b91 100644
--- a/drivers/accel/amdxdna/aie4_msg_priv.h
+++ b/drivers/accel/amdxdna/aie4_msg_priv.h
@@ -6,10 +6,12 @@
  #ifndef _AIE4_MSG_PRIV_H_
  #define _AIE4_MSG_PRIV_H_
+#include <linux/sizes.h>
  #include <linux/types.h>
enum aie4_msg_opcode {
        AIE4_MSG_OP_SUSPEND                          = 0x10003,
+       AIE4_MSG_OP_ATTACH_WORK_BUFFER               = 0x1000D,
AIE4_MSG_OP_CREATE_VFS = 0x20001,
        AIE4_MSG_OP_DESTROY_VFS                      = 0x20002,
@@ -130,4 +132,16 @@ struct aie4_msg_aie4_tile_info_resp {
        struct aie4_tile_info info;
  } __packed;
+#define AIE4_WORK_BUFFER_MIN_SIZE SZ_4M
+
+struct aie4_msg_attach_work_buffer_req {
+       __u64 buff_addr;
+       __u32 reserved;
+       __u32 buff_size;
+} __packed;
+
+struct aie4_msg_attach_work_buffer_resp {
+       enum aie4_msg_status status;
+} __packed;
+
  #endif /* _AIE4_MSG_PRIV_H_ */
diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
index 8b5eff0e45c1..a58a83af42a4 100644
--- a/drivers/accel/amdxdna/aie4_pci.c
+++ b/drivers/accel/amdxdna/aie4_pci.c
@@ -286,8 +286,14 @@ static int aie4_pf_hw_start(struct amdxdna_dev_hdl *ndev)
        if (ret)
                goto stop_fw;
+ ret = aie4_attach_work_buffer(ndev);
+       if (ret)
+               goto mbox_fini;
+
        return 0;
+mbox_fini:
+       aie4_mailbox_fini(ndev);
  stop_fw:
        aie4_fw_stop(ndev);
@@ -564,6 +570,40 @@ static int aie4_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
        return ret;
  }
+static int aie4_alloc_work_buffer(struct amdxdna_dev_hdl *ndev)
+{
+       struct amdxdna_dev *xdna = ndev->aie.xdna;
+       u32 buf_size = AIE4_WORK_BUFFER_MIN_SIZE;
+
+       ndev->work_buf = amdxdna_alloc_msg_buffer(xdna, &buf_size,
+                                                 &ndev->work_buf_addr);
+       if (IS_ERR(ndev->work_buf)) {
+               int ret = PTR_ERR(ndev->work_buf);
+
+               XDNA_ERR(xdna, "Failed to alloc work buffer, size 0x%x",
+                        AIE4_WORK_BUFFER_MIN_SIZE);
+               ndev->work_buf = NULL;
+               return ret;
+       }
+
+       ndev->work_buf_size = buf_size;
+       XDNA_DBG(xdna, "Work buffer allocated: size 0x%x", buf_size);
+
+       return 0;
+}
+
+static void aie4_free_work_buffer(struct amdxdna_dev_hdl *ndev)
+{
+       struct amdxdna_dev *xdna = ndev->aie.xdna;
+
+       if (!ndev->work_buf)
+               return;
+
+       amdxdna_free_msg_buffer(xdna, ndev->work_buf_size, ndev->work_buf,
+                               ndev->work_buf_addr);
+       ndev->work_buf = NULL;
+}
+
  static int aie4_pf_init(struct amdxdna_dev *xdna)
  {
        int ret;
@@ -572,7 +612,19 @@ static int aie4_pf_init(struct amdxdna_dev *xdna)
        if (ret)
                return ret;
- return aie4_pf_hw_start(xdna->dev_handle);
+       ret = aie4_alloc_work_buffer(xdna->dev_handle);
+       if (ret)
+               return ret;
+
+       ret = aie4_pf_hw_start(xdna->dev_handle);
+       if (ret)
+               goto free_work_buf;
+
+       return 0;
+
+free_work_buf:
+       aie4_free_work_buffer(xdna->dev_handle);
+       return ret;
  }
static int aie4_vf_init(struct amdxdna_dev *xdna)
@@ -590,6 +642,7 @@ static void aie4_pf_fini(struct amdxdna_dev *xdna)
  {
        aie4_sriov_stop(xdna->dev_handle);
        aie4_pf_hw_stop(xdna->dev_handle);
+       aie4_free_work_buffer(xdna->dev_handle);
  }
static void aie4_vf_fini(struct amdxdna_dev *xdna)
diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
index 1886cffc62db..390864876ca5 100644
--- a/drivers/accel/amdxdna/aie4_pci.h
+++ b/drivers/accel/amdxdna/aie4_pci.h
@@ -53,11 +53,16 @@ struct amdxdna_dev_hdl {
struct xarray cert_comp_xa; /* device level indexed by msix id */
        struct mutex                    cert_comp_lock; /* protects cert_comp 
operations*/
+
+       void                            *work_buf;
+       dma_addr_t                      work_buf_addr;
+       u32                             work_buf_size;
  };
/* aie4_message.c */
  int aie4_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata 
*metadata);
  int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
+int aie4_attach_work_buffer(struct amdxdna_dev_hdl *ndev);
/* aie4_ctx.c */
  int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c 
b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index c0d00db25cde..a6e9be7960c2 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -40,9 +40,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
   * 0.7: Support getting power and utilization data
   * 0.8: Support BO usage query
   * 0.9: Add new device type AMDXDNA_DEV_TYPE_PF
+ * 0.10: Support AIE4 UMQ
   */
  #define AMDXDNA_DRIVER_MAJOR          0
-#define AMDXDNA_DRIVER_MINOR           9
+#define AMDXDNA_DRIVER_MINOR           10
/*
   * Bind the driver base on (vendor_id, device_id) pair and later use the

Reply via email to