Introduce an expandable device heap to avoid allocating a large heap
upfront. Start with a smaller initial heap and grow it on demand.
Return -EAGAIN when BO allocation fails due to insufficient heap space,
allowing userspace to trigger heap expansion via a heap BO creation
IOCTL and retry the allocation.

Manage heap chunks using an xarray. On expansion, register new chunks
with the firmware via MSG_OP_ADD_HOST_BUFFER.

Since heap shrinking is not supported by the firmware, release all heap
chunks on device close.

Co-developed-by: Wendy Liang <[email protected]>
Signed-off-by: Wendy Liang <[email protected]>
Signed-off-by: Lizhi Hou <[email protected]>
---
 drivers/accel/amdxdna/aie2_ctx.c        |  45 +++-
 drivers/accel/amdxdna/aie2_message.c    |  52 +++-
 drivers/accel/amdxdna/aie2_msg_priv.h   |   1 +
 drivers/accel/amdxdna/aie2_pci.c        |   1 +
 drivers/accel/amdxdna/aie2_pci.h        |   8 +-
 drivers/accel/amdxdna/amdxdna_ctx.c     |  83 ++++++-
 drivers/accel/amdxdna/amdxdna_ctx.h     |   2 +
 drivers/accel/amdxdna/amdxdna_gem.c     | 308 ++++++++++++++++++------
 drivers/accel/amdxdna/amdxdna_gem.h     |  16 +-
 drivers/accel/amdxdna/amdxdna_pci_drv.c |  12 +-
 drivers/accel/amdxdna/amdxdna_pci_drv.h |   8 +-
 drivers/accel/amdxdna/npu1_regs.c       |   1 +
 drivers/accel/amdxdna/npu4_regs.c       |   2 +
 drivers/accel/amdxdna/npu5_regs.c       |   1 +
 drivers/accel/amdxdna/npu6_regs.c       |   1 +
 15 files changed, 446 insertions(+), 95 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 7d6094aefb6f..658a5fb1fda6 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -91,6 +91,7 @@ static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct 
amdxdna_hwctx *hwct
 static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx 
*hwctx)
 {
        struct amdxdna_gem_obj *heap = hwctx->priv->heap;
+       unsigned long heap_id;
        int ret;
 
        ret = aie2_create_context(xdna->dev_handle, hwctx);
@@ -107,6 +108,17 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, 
struct amdxdna_hwctx *hw
                goto out;
        }
 
+       xa_for_each_range(&hwctx->client->dev_heap_xa, heap_id, heap, 1,
+                         hwctx->last_attached_heap) {
+               ret = aie2_add_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
+                                       amdxdna_obj_dma_addr(heap),
+                                       heap->mem.size);
+               if (ret) {
+                       XDNA_ERR(xdna, "Add heap %ld failed ret %d", heap_id, 
ret);
+                       goto out;
+               }
+       }
+
        ret = aie2_config_cu(hwctx, NULL);
        if (ret) {
                XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
@@ -650,7 +662,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
        hwctx->priv = priv;
 
        mutex_lock(&client->mm_lock);
-       heap = client->dev_heap;
+       heap = xa_load(&client->dev_heap_xa, 0);
        if (!heap) {
                XDNA_ERR(xdna, "The client dev heap object not exist");
                mutex_unlock(&client->mm_lock);
@@ -732,6 +744,12 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
                goto release_resource;
        }
 
+       ret = amdxdna_update_heap(client, hwctx);
+       if (ret) {
+               XDNA_ERR(xdna, "Update heap failed, ret %d", ret);
+               goto release_resource;
+       }
+
        ret = aie2_ctx_syncobj_create(hwctx);
        if (ret) {
                XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
@@ -1161,3 +1179,28 @@ void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
        else if (ret == -ERESTARTSYS)
                XDNA_DBG(xdna, "Wait for bo interrupted by signal");
 }
+
+int aie2_hwctx_heap_expand(struct amdxdna_hwctx *hwctx,
+                          struct amdxdna_gem_obj *heap)
+{
+       struct amdxdna_client *client = hwctx->client;
+       struct amdxdna_dev *xdna = client->xdna;
+       u64 addr;
+       int ret;
+
+       ret = amdxdna_pm_resume_get_locked(xdna);
+       if (ret)
+               return ret;
+
+       addr = amdxdna_obj_dma_addr(heap);
+       ret = aie2_add_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
+                               addr, heap->mem.size);
+       if (ret) {
+               XDNA_ERR(xdna, "Add heap failed hwctx %s 0x%lx ret %d",
+                        hwctx->name, heap->mem.size, ret);
+       }
+
+       amdxdna_pm_suspend_put(xdna);
+
+       return ret;
+}
diff --git a/drivers/accel/amdxdna/aie2_message.c 
b/drivers/accel/amdxdna/aie2_message.c
index 48aac0c570fa..0140b5f0e467 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -301,25 +301,59 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, 
struct amdxdna_hwctx *hwc
        return ret;
 }
 
-int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, 
u64 size)
+static int aie2_send_host_buf_msgs(struct amdxdna_dev_hdl *ndev, u32 
context_id,
+                                  u64 addr, u64 size, u32 initial_opcode)
 {
        DECLARE_AIE_MSG(map_host_buffer, MSG_OP_MAP_HOST_BUFFER);
        struct amdxdna_dev *xdna = ndev->aie.xdna;
+       size_t chunk_size;
        int ret;
 
-       req.context_id = context_id;
-       req.buf_addr = addr;
-       req.buf_size = size;
-       ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
-       if (ret)
-               return ret;
+       chunk_size = xdna->dev_info->dev_mem_size;
+       if (!size || !IS_ALIGNED(size, chunk_size)) {
+               XDNA_ERR(xdna, "Invalid size 0x%llx for chunk 0x%lx",
+                        size, chunk_size);
+               return -EINVAL;
+       }
 
-       XDNA_DBG(xdna, "fw ctx %d map host buf addr 0x%llx size 0x%llx",
-                context_id, addr, size);
+       msg.opcode = initial_opcode;
+       do {
+               req.context_id = context_id;
+               req.buf_addr = addr;
+               req.buf_size = chunk_size;
+               ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg);
+               if (ret) {
+                       XDNA_ERR(xdna, "fw ctx %d addr 0x%llx size 0x%lx",
+                                context_id, addr, chunk_size);
+                       return ret;
+               }
+
+               XDNA_DBG(xdna, "fw ctx %d host buf op 0x%x addr 0x%llx size 
0x%lx",
+                        context_id, msg.opcode, addr, chunk_size);
+
+               addr += chunk_size;
+               size -= chunk_size;
+               msg.opcode = MSG_OP_ADD_HOST_BUFFER;
+       } while (size);
 
        return 0;
 }
 
+int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, 
u64 size)
+{
+       return aie2_send_host_buf_msgs(ndev, context_id, addr, size,
+                                      MSG_OP_MAP_HOST_BUFFER);
+}
+
+int aie2_add_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, 
u64 size)
+{
+       if (!AIE_FEATURE_ON(&ndev->aie, AIE2_ADD_HOST_BUFFER))
+               return -EOPNOTSUPP;
+
+       return aie2_send_host_buf_msgs(ndev, context_id, addr, size,
+                                      MSG_OP_ADD_HOST_BUFFER);
+}
+
 static int amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx, void *arg)
 {
        u32 *bitmap = arg;
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h 
b/drivers/accel/amdxdna/aie2_msg_priv.h
index a41c9797e265..fd65a4236d49 100644
--- a/drivers/accel/amdxdna/aie2_msg_priv.h
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -33,6 +33,7 @@ enum aie2_msg_opcode {
        MSG_OP_REGISTER_ASYNC_EVENT_MSG    = 0x10C,
        MSG_OP_UPDATE_PROPERTY             = 0x113,
        MSG_OP_GET_APP_HEALTH              = 0x114,
+       MSG_OP_ADD_HOST_BUFFER             = 0x115,
        MSG_OP_GET_DEV_REVISION            = 0x117,
        MSG_OP_MAX_DRV_OPCODE,
        MSG_OP_GET_PROTOCOL_VERSION        = 0x301,
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 6c8a0f70b73d..c4d345d4c76b 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -1241,4 +1241,5 @@ const struct amdxdna_dev_ops aie2_ops = {
        .hmm_invalidate = aie2_hmm_invalidate,
        .get_array = aie2_get_array,
        .get_dev_revision = aie2_get_dev_rev,
+       .hwctx_heap_expand = aie2_hwctx_heap_expand,
 };
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index 84bdb3f8b8f9..77648cc548b6 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -15,8 +15,9 @@
 #include "amdxdna_mailbox.h"
 
 /* Firmware determines device memory base address and size */
-#define AIE2_DEVM_BASE 0x4000000
-#define AIE2_DEVM_SIZE SZ_64M
+#define AIE2_DEVM_BASE         0x4000000
+#define AIE2_DEVM_SIZE         SZ_64M
+#define AIE2_DEVM_MAX_SIZE     SZ_512M
 
 #define NDEV2PDEV(ndev) (to_pci_dev((ndev)->aie.xdna->ddev.dev))
 
@@ -198,6 +199,7 @@ enum aie2_fw_feature {
        AIE2_PREEMPT,
        AIE2_TEMPORAL_ONLY,
        AIE2_APP_HEALTH,
+       AIE2_ADD_HOST_BUFFER,
        AIE2_UPDATE_PROPERTY,
        AIE2_GET_DEV_REVISION,
        AIE2_FEATURE_MAX
@@ -271,6 +273,7 @@ int aie2_get_dev_revision(struct amdxdna_dev_hdl *ndev, 
enum aie2_dev_revision *
 int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx 
*hwctx);
 int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx 
*hwctx);
 int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, 
u64 size);
+int aie2_add_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, 
u64 size);
 int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 
size, u32 *cols_filled);
 int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
                         char __user *buf, u32 size,
@@ -302,5 +305,6 @@ void aie2_hwctx_suspend(struct amdxdna_client *client);
 int aie2_hwctx_resume(struct amdxdna_client *client);
 int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job 
*job, u64 *seq);
 void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
+int aie2_hwctx_heap_expand(struct amdxdna_hwctx *hwctx, struct amdxdna_gem_obj 
*heap);
 
 #endif /* _AIE2_PCI_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c 
b/drivers/accel/amdxdna/amdxdna_ctx.c
index b79229a63af3..ea9e9c2ca318 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -61,16 +61,35 @@ static struct dma_fence *amdxdna_fence_create(struct 
amdxdna_hwctx *hwctx)
        return &fence->base;
 }
 
+static void amdxdna_hwctx_release_expanded_heap(struct amdxdna_hwctx *hwctx)
+{
+       struct amdxdna_client *client = hwctx->client;
+       struct amdxdna_gem_obj *heap;
+       unsigned long heap_id;
+
+       mutex_lock(&client->mm_lock);
+       if (hwctx->last_attached_heap) {
+               xa_for_each_range(&client->dev_heap_xa, heap_id, heap, 1,
+                                 hwctx->last_attached_heap) {
+                       amdxdna_gem_unpin(heap);
+                       drm_gem_object_put(to_gobj(heap));
+               }
+       }
+       mutex_unlock(&client->mm_lock);
+}
+
 static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx,
                                      struct srcu_struct *ss)
 {
-       struct amdxdna_dev *xdna = hwctx->client->xdna;
+       struct amdxdna_client *client = hwctx->client;
+       struct amdxdna_dev *xdna = client->xdna;
 
        synchronize_srcu(ss);
 
        /* At this point, user is not able to submit new commands */
        xdna->dev_info->ops->hwctx_fini(hwctx);
 
+       amdxdna_hwctx_release_expanded_heap(hwctx);
        kfree(hwctx->name);
        kfree(hwctx);
 }
@@ -407,6 +426,68 @@ int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client 
*client, u32 debug_bo_hdl)
        return ret;
 }
 
+static int amdxdna_hwctx_expand_heap(struct amdxdna_hwctx *hwctx)
+{
+       struct amdxdna_client *client = hwctx->client;
+       struct amdxdna_dev *xdna = client->xdna;
+       struct amdxdna_gem_obj *heap;
+       unsigned long heap_id, nid;
+       int ret = 0;
+
+       nid = hwctx->last_attached_heap + 1;
+       if (nid == client->dev_heap_nid)
+               goto out;
+
+       if (!xdna->dev_info->ops->hwctx_heap_expand) {
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
+
+       xa_for_each_range(&client->dev_heap_xa, heap_id, heap,
+                         nid, client->dev_heap_nid) {
+               drm_gem_object_get(to_gobj(heap));
+               ret = amdxdna_gem_pin(heap);
+               if (ret) {
+                       drm_gem_object_put(to_gobj(heap));
+                       break;
+               }
+
+               mutex_unlock(&client->mm_lock);
+               ret = xdna->dev_info->ops->hwctx_heap_expand(hwctx, heap);
+               mutex_lock(&client->mm_lock);
+               if (ret) {
+                       amdxdna_gem_unpin(heap);
+                       drm_gem_object_put(to_gobj(heap));
+                       break;
+               }
+
+               hwctx->last_attached_heap = heap_id;
+       }
+
+out:
+       return ret;
+}
+
+int amdxdna_update_heap(struct amdxdna_client *client, struct amdxdna_hwctx 
*hwctx)
+{
+       unsigned long hwctx_id;
+       int ret;
+
+       if (hwctx) {
+               guard(mutex)(&client->mm_lock);
+               return amdxdna_hwctx_expand_heap(hwctx);
+       }
+
+       guard(mutex)(&client->mm_lock);
+       amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
+               ret = amdxdna_hwctx_expand_heap(hwctx);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
 static void
 amdxdna_arg_bos_put(struct amdxdna_sched_job *job)
 {
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h 
b/drivers/accel/amdxdna/amdxdna_ctx.h
index 6e3c6371a088..aaae16430466 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -109,6 +109,7 @@ struct amdxdna_hwctx {
        u32                             umq_bo_hdl;
        u32                             doorbell_offset;
        u32                             num_unused_col;
+       u32                             last_attached_heap;
 
        struct amdxdna_qos_info              qos;
        struct amdxdna_hwctx_param_config_cu *cus;
@@ -205,6 +206,7 @@ void amdxdna_hwctx_remove_all(struct amdxdna_client 
*client);
 int amdxdna_hwctx_walk(struct amdxdna_client *client, void *arg,
                       int (*walk)(struct amdxdna_hwctx *hwctx, void *arg));
 int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 
debug_bo_hdl);
+int amdxdna_update_heap(struct amdxdna_client *client, struct amdxdna_hwctx 
*hwctx);
 
 int amdxdna_cmd_submit(struct amdxdna_client *client,
                       struct amdxdna_drv_cmd *drv_cmd, u32 cmd_bo_hdls,
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c 
b/drivers/accel/amdxdna/amdxdna_gem.c
index 319d2064fafa..4b362e79505d 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -24,6 +24,77 @@
 
 MODULE_IMPORT_NS("DMA_BUF");
 
+/*
+ * The dev BO could be across multiple heap BO chunks. The heap chunks should
+ * be mapped to userspace and the userspace virtual address should be
+ * contiguous.
+ */
+static int
+amdxdna_init_dev_bo(struct amdxdna_gem_obj *dev_bo)
+{
+       struct amdxdna_client *client = dev_bo->client;
+       struct amdxdna_dev *xdna = client->xdna;
+       struct amdxdna_gem_obj *heap;
+       u64 heap_addr, exp_heap_uva;
+       u32 heap_id;
+
+       if (xa_empty(&client->dev_heap_xa)) {
+               XDNA_DBG(xdna, "Empty heap xa");
+               return -EAGAIN;
+       }
+
+       for (heap_id = 0; heap_id < client->dev_heap_nid; heap_id++) {
+               heap = xa_load(&client->dev_heap_xa, heap_id);
+               if (!heap) {
+                       XDNA_ERR(xdna, "Failed to load heap %d", heap_id);
+                       return -EINVAL;
+               }
+               heap_addr = amdxdna_gem_dev_addr(heap);
+               if (heap_addr > dev_bo->mm_node.start)
+                       break;
+       }
+
+       heap_id--;
+       heap = xa_load(&client->dev_heap_xa, heap_id);
+       exp_heap_uva = amdxdna_gem_uva(heap);
+       heap_addr = amdxdna_gem_dev_addr(heap);
+       dev_bo->heap_start_id = heap_id;
+       dev_bo->mem.uva = dev_bo->mm_node.start - heap_addr + exp_heap_uva;
+
+       for (; heap_id < client->dev_heap_nid; heap_id++) {
+               heap = xa_load(&client->dev_heap_xa, heap_id);
+               if (!heap) {
+                       XDNA_ERR(xdna, "Failed to load heap %d", heap_id);
+                       return -EINVAL;
+               }
+               heap_addr = amdxdna_gem_uva(heap);
+               if (heap_addr == AMDXDNA_INVALID_ADDR) {
+                       XDNA_ERR(xdna, "Heap %d is not mapped", heap_id);
+                       return -EAGAIN;
+               }
+
+               if (heap_addr != exp_heap_uva) {
+                       XDNA_ERR(xdna, "Heap %d uva is not contiguous", 
heap_id);
+                       return -EINVAL;
+               }
+
+               if (heap->dev_addr + heap->mem.size >=
+                   dev_bo->mm_node.start + dev_bo->mem.size)
+                       break;
+
+               exp_heap_uva += heap->mem.size;
+       }
+
+       if (heap_id == client->dev_heap_nid) {
+               XDNA_DBG(xdna, "Can not find heap end");
+               return -EAGAIN;
+       }
+
+       dev_bo->heap_end_id = heap_id;
+
+       return 0;
+}
+
 static int
 amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
 {
@@ -31,32 +102,22 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
        struct amdxdna_dev *xdna = client->xdna;
        struct amdxdna_mem *mem = &abo->mem;
        struct amdxdna_gem_obj *heap;
+       unsigned long heap_id;
        u32 align;
        int ret;
 
        mutex_lock(&client->mm_lock);
 
-       heap = client->dev_heap;
-       if (!heap) {
-               ret = -EINVAL;
-               goto unlock_out;
-       }
-
-       if (amdxdna_gem_uva(heap) == AMDXDNA_INVALID_ADDR) {
-               XDNA_ERR(xdna, "Invalid dev heap userptr");
-               ret = -EINVAL;
-               goto unlock_out;
-       }
-
-       if (mem->size == 0 || mem->size > heap->mem.size) {
-               XDNA_ERR(xdna, "Invalid dev bo size 0x%lx, limit 0x%lx",
-                        mem->size, heap->mem.size);
+       if (!mem->size || mem->size > xdna->dev_info->dev_heap_max_size) {
+               XDNA_ERR(xdna, "Invalid dev bo size 0x%lx, max heap 0x%lx",
+                        mem->size, xdna->dev_info->dev_heap_max_size);
                ret = -EINVAL;
                goto unlock_out;
        }
 
        align = 1 << max(PAGE_SHIFT, xdna->dev_info->dev_mem_buf_shift);
-       ret = drm_mm_insert_node_generic(&heap->mm, &abo->mm_node,
+       ret = drm_mm_insert_node_generic(&client->dev_heap_mm,
+                                        &abo->mm_node,
                                         mem->size, align,
                                         0, DRM_MM_INSERT_BEST);
        if (ret) {
@@ -64,9 +125,16 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
                goto unlock_out;
        }
 
-       client->heap_usage += mem->size;
+       ret = amdxdna_init_dev_bo(abo);
+       if (ret) {
+               drm_mm_remove_node(&abo->mm_node);
+               goto unlock_out;
+       }
 
-       drm_gem_object_get(to_gobj(heap));
+       client->heap_usage += mem->size;
+       xa_for_each_range(&client->dev_heap_xa, heap_id, heap,
+                         abo->heap_start_id, abo->heap_end_id)
+               drm_gem_object_get(to_gobj(heap));
 
 unlock_out:
        mutex_unlock(&client->mm_lock);
@@ -79,13 +147,16 @@ amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo)
 {
        struct amdxdna_client *client = abo->client;
        struct amdxdna_gem_obj *heap;
+       unsigned long heap_id;
 
        mutex_lock(&client->mm_lock);
 
        drm_mm_remove_node(&abo->mm_node);
        client->heap_usage -= abo->mem.size;
-       heap = client->dev_heap;
-       drm_gem_object_put(to_gobj(heap));
+
+       xa_for_each_range(&client->dev_heap_xa, heap_id, heap,
+                         abo->heap_start_id, abo->heap_end_id)
+               drm_gem_object_put(to_gobj(heap));
 
        mutex_unlock(&client->mm_lock);
 }
@@ -161,31 +232,13 @@ static void amdxdna_gem_vunmap(struct amdxdna_gem_obj 
*abo)
        }
 }
 
-/*
- * Obtain the user virtual address for accessing the BO.
- * It can be used for device to access the BO when PASID is enabled.
- */
-u64 amdxdna_gem_uva(struct amdxdna_gem_obj *abo)
-{
-       if (abo->type == AMDXDNA_BO_DEV) {
-               struct amdxdna_gem_obj *heap = abo->client->dev_heap;
-               u64 off = amdxdna_dev_bo_offset(abo);
-
-               if (amdxdna_gem_uva(heap) != AMDXDNA_INVALID_ADDR)
-                       return amdxdna_gem_uva(heap) + off;
-               return AMDXDNA_INVALID_ADDR;
-       }
-
-       return abo->mem.uva;
-}
-
 /*
  * Obtain the address for device to access the BO.
  */
 u64 amdxdna_gem_dev_addr(struct amdxdna_gem_obj *abo)
 {
        if (abo->type == AMDXDNA_BO_DEV_HEAP)
-               return abo->client->xdna->dev_info->dev_mem_base;
+               return abo->dev_addr;
        if (abo->type == AMDXDNA_BO_DEV)
                return abo->mm_node.start;
        return amdxdna_obj_dma_addr(abo);
@@ -566,9 +619,6 @@ static void amdxdna_gem_obj_free(struct drm_gem_object 
*gobj)
        if (abo->pinned)
                amdxdna_gem_unpin(abo);
 
-       if (abo->type == AMDXDNA_BO_DEV_HEAP)
-               drm_mm_takedown(&abo->mm);
-
        amdxdna_dma_unmap_bo(xdna, abo);
        amdxdna_gem_vunmap(abo);
        mutex_destroy(&abo->lock);
@@ -654,11 +704,23 @@ static void amdxdna_gem_obj_vunmap(struct drm_gem_object 
*obj, struct iosys_map
 static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, struct 
iosys_map *map)
 {
        struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
-       void *base = amdxdna_gem_vmap(abo->client->dev_heap);
-       u64 offset = amdxdna_dev_bo_offset(abo);
+       struct amdxdna_gem_obj *heap;
+       void *base;
+       u64 offset;
+
+       /* vmap dev bo which is across more than 1 heap is not allowed */
+       if (abo->heap_start_id != abo->heap_end_id)
+               return -ENOMEM;
 
+       heap = xa_load(&abo->client->dev_heap_xa, abo->heap_start_id);
+       if (!heap)
+               return -ENOMEM;
+
+       base = amdxdna_gem_vmap(heap);
        if (!base)
                return -ENOMEM;
+
+       offset = amdxdna_gem_dev_addr(abo) - amdxdna_gem_dev_addr(heap);
        iosys_map_set_vaddr(map, base + offset);
        return 0;
 }
@@ -873,15 +935,25 @@ amdxdna_drm_create_dev_heap_bo(struct drm_device *dev,
        /* Set up heap for this client. */
        mutex_lock(&client->mm_lock);
 
-       if (client->dev_heap) {
-               XDNA_DBG(client->xdna, "dev heap is already created");
-               ret = -EBUSY;
+       if (client->total_heap_size + abo->mem.size >
+           xdna->dev_info->dev_heap_max_size) {
+               XDNA_ERR(xdna, "Heap size 0x%lx + 0x%lx exceeds max 0x%lx",
+                        client->total_heap_size, abo->mem.size,
+                        xdna->dev_info->dev_heap_max_size);
+               ret = -ENOSPC;
                goto mm_unlock;
        }
-       client->dev_heap = abo;
-       drm_gem_object_get(to_gobj(abo));
 
-       drm_mm_init(&abo->mm, xdna->dev_info->dev_mem_base, abo->mem.size);
+       ret = xa_insert(&client->dev_heap_xa, client->dev_heap_nid, abo, 
GFP_KERNEL);
+       if (ret) {
+               XDNA_ERR(xdna, "Add heap failed %d", ret);
+               goto mm_unlock;
+       }
+
+       abo->dev_addr = xdna->dev_info->dev_mem_base + client->total_heap_size;
+       client->total_heap_size += abo->mem.size;
+       client->dev_heap_nid++;
+       drm_gem_object_get(to_gobj(abo));
 
        mutex_unlock(&client->mm_lock);
 
@@ -924,10 +996,10 @@ amdxdna_drm_create_dev_bo(struct drm_device *dev,
 
        ret = amdxdna_gem_heap_alloc(abo);
        if (ret) {
-               XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret);
                amdxdna_gem_destroy_obj(abo);
                return ERR_PTR(ret);
        }
+
        drm_gem_private_object_init(dev, gobj, aligned_sz);
 
        return abo;
@@ -935,6 +1007,7 @@ amdxdna_drm_create_dev_bo(struct drm_device *dev,
 
 int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct 
drm_file *filp)
 {
+       struct amdxdna_client *client = filp->driver_priv;
        struct amdxdna_dev *xdna = to_xdna_dev(dev);
        struct amdxdna_drm_create_bo *args = data;
        struct amdxdna_gem_obj *abo;
@@ -955,6 +1028,13 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, 
void *data, struct drm_f
                break;
        case AMDXDNA_BO_DEV:
                abo = amdxdna_drm_create_dev_bo(dev, args, filp);
+               if (!IS_ERR(abo)) {
+                       mutex_lock(&xdna->dev_lock);
+                       ret = amdxdna_update_heap(client, NULL);
+                       mutex_unlock(&xdna->dev_lock);
+                       if (ret)
+                               goto put_obj;
+               }
                break;
        default:
                return -EINVAL;
@@ -978,14 +1058,11 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, 
void *data, struct drm_f
        return ret;
 }
 
-int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo)
+static int amdxdna_bo_pin(struct amdxdna_gem_obj *abo)
 {
        struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
        int ret;
 
-       if (abo->type == AMDXDNA_BO_DEV)
-               abo = abo->client->dev_heap;
-
        if (is_import_bo(abo))
                return 0;
 
@@ -995,6 +1072,45 @@ int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo)
        return ret;
 }
 
+static void amdxdna_bo_unpin(struct amdxdna_gem_obj *abo)
+{
+       struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+
+       if (is_import_bo(abo))
+               return;
+
+       drm_gem_shmem_unpin(&abo->base);
+
+       XDNA_DBG(xdna, "BO type %d", abo->type);
+}
+
+int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo)
+{
+       struct amdxdna_client *client = abo->client;
+       struct amdxdna_gem_obj *heap;
+       unsigned long heap_id, last = ULONG_MAX;
+       int ret = 0;
+
+       if (abo->type != AMDXDNA_BO_DEV)
+               return amdxdna_bo_pin(abo);
+
+       xa_for_each_range(&client->dev_heap_xa, heap_id, heap,
+                         abo->heap_start_id, abo->heap_end_id) {
+               ret = amdxdna_bo_pin(heap);
+               if (ret)
+                       break;
+               last = heap_id;
+       }
+
+       if (ret && last <= abo->heap_end_id) {
+               xa_for_each_range(&client->dev_heap_xa, heap_id, heap,
+                                 abo->heap_start_id, last)
+                       amdxdna_bo_unpin(heap);
+       }
+
+       return ret;
+}
+
 int amdxdna_gem_pin(struct amdxdna_gem_obj *abo)
 {
        int ret;
@@ -1008,14 +1124,18 @@ int amdxdna_gem_pin(struct amdxdna_gem_obj *abo)
 
 void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo)
 {
-       if (abo->type == AMDXDNA_BO_DEV)
-               abo = abo->client->dev_heap;
+       mutex_lock(&abo->lock);
+       if (abo->type == AMDXDNA_BO_DEV) {
+               struct amdxdna_gem_obj *heap;
+               unsigned long heap_id;
 
-       if (is_import_bo(abo))
-               return;
+               xa_for_each_range(&abo->client->dev_heap_xa, heap_id, heap,
+                                 abo->heap_start_id, abo->heap_end_id)
+                       amdxdna_bo_unpin(heap);
+       } else {
+               amdxdna_bo_unpin(abo);
+       }
 
-       mutex_lock(&abo->lock);
-       drm_gem_shmem_unpin(&abo->base);
        mutex_unlock(&abo->lock);
 }
 
@@ -1072,6 +1192,29 @@ int amdxdna_drm_get_bo_info_ioctl(struct drm_device 
*dev, void *data, struct drm
        return ret;
 }
 
+static int amdxdna_flush_bo(struct amdxdna_gem_obj *abo, u64 offset, u64 size)
+{
+       u64 end;
+
+       if (offset >= abo->mem.size)
+               return -EINVAL;
+
+       if (check_add_overflow(offset, size, &end))
+               return -EINVAL;
+
+       size = min(abo->mem.size, end) - offset;
+       if (is_import_bo(abo))
+               drm_clflush_sg(abo->base.sgt);
+       else if (amdxdna_gem_vmap(abo))
+               drm_clflush_virt_range(amdxdna_gem_vmap(abo) + offset, size);
+       else if (abo->base.pages)
+               drm_clflush_pages(abo->base.pages, abo->mem.size >> PAGE_SHIFT);
+       else
+               return -EINVAL;
+
+       return 0;
+}
+
 /*
  * The sync bo ioctl is to make sure the CPU cache is in sync with memory.
  * This is required because NPU is not cache coherent device. CPU cache
@@ -1082,11 +1225,12 @@ int amdxdna_drm_get_bo_info_ioctl(struct drm_device 
*dev, void *data, struct drm
 int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
                              void *data, struct drm_file *filp)
 {
+       struct amdxdna_client *client = filp->driver_priv;
        struct amdxdna_dev *xdna = to_xdna_dev(dev);
        struct amdxdna_drm_sync_bo *args = data;
        struct amdxdna_gem_obj *abo;
        struct drm_gem_object *gobj;
-       int ret;
+       int ret = 0;
 
        gobj = drm_gem_object_lookup(filp, args->handle);
        if (!gobj) {
@@ -1095,22 +1239,34 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
        }
        abo = to_xdna_obj(gobj);
 
-       ret = amdxdna_gem_pin(abo);
-       if (ret) {
-               XDNA_ERR(xdna, "Pin BO %d failed, ret %d", args->handle, ret);
-               goto put_obj;
-       }
+       if (abo->type == AMDXDNA_BO_DEV) {
+               struct amdxdna_gem_obj *heap;
+               unsigned long heap_id;
+
+               xa_for_each_range(&client->dev_heap_xa, heap_id, heap,
+                                 abo->heap_start_id, abo->heap_end_id) {
+                       ret = amdxdna_flush_bo(heap, 0, heap->mem.size);
+                       if (ret) {
+                               XDNA_ERR(xdna, "Failed to flush heap %ld ret 
%d",
+                                        heap_id, ret);
+                               goto put_obj;
+                       }
+               }
+       } else {
+               ret = amdxdna_gem_pin(abo);
+               if (ret) {
+                       XDNA_ERR(xdna, "Pin BO %d failed, ret %d", 
args->handle, ret);
+                       goto put_obj;
+               }
 
-       if (is_import_bo(abo))
-               drm_clflush_sg(abo->base.sgt);
-       else if (amdxdna_gem_vmap(abo))
-               drm_clflush_virt_range(amdxdna_gem_vmap(abo) + args->offset, 
args->size);
-       else if (abo->base.pages)
-               drm_clflush_pages(abo->base.pages, gobj->size >> PAGE_SHIFT);
-       else
-               drm_WARN(&xdna->ddev, 1, "Can not get flush memory");
+               ret = amdxdna_flush_bo(abo, args->offset, args->size);
+               amdxdna_gem_unpin(abo);
 
-       amdxdna_gem_unpin(abo);
+               if (ret) {
+                       drm_WARN(&xdna->ddev, 1, "Can not get flush memory");
+                       goto put_obj;
+               }
+       }
 
        XDNA_DBG(xdna, "Sync bo %d offset 0x%llx, size 0x%llx\n",
                 args->handle, args->offset, args->size);
diff --git a/drivers/accel/amdxdna/amdxdna_gem.h 
b/drivers/accel/amdxdna/amdxdna_gem.h
index 4fc48a1189d2..6a6df51969e0 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.h
+++ b/drivers/accel/amdxdna/amdxdna_gem.h
@@ -46,8 +46,10 @@ struct amdxdna_gem_obj {
        int                             open_ref;
 
        /* Below members are initialized when needed */
-       struct drm_mm                   mm; /* For AMDXDNA_BO_DEV_HEAP */
        struct drm_mm_node              mm_node; /* For AMDXDNA_BO_DEV */
+       u32                             heap_start_id;
+       u32                             heap_end_id;
+       u64                             dev_addr; /* For heap bo */
        u32                             assigned_hwctx;
        struct dma_buf                  *dma_buf;
        struct dma_buf_attachment       *attach;
@@ -71,13 +73,21 @@ static inline void amdxdna_gem_put_obj(struct 
amdxdna_gem_obj *abo)
        drm_gem_object_put(to_gobj(abo));
 }
 
+/*
+ * Obtain the user virtual address for accessing the BO.
+ * It can be used for device to access the BO when PASID is enabled.
+ */
+static inline u64 amdxdna_gem_uva(struct amdxdna_gem_obj *abo)
+{
+       return abo->mem.uva;
+}
+
 void *amdxdna_gem_vmap(struct amdxdna_gem_obj *abo);
-u64 amdxdna_gem_uva(struct amdxdna_gem_obj *abo);
 u64 amdxdna_gem_dev_addr(struct amdxdna_gem_obj *abo);
 
 static inline u64 amdxdna_dev_bo_offset(struct amdxdna_gem_obj *abo)
 {
-       return amdxdna_gem_dev_addr(abo) - 
amdxdna_gem_dev_addr(abo->client->dev_heap);
+       return amdxdna_gem_dev_addr(abo) - 
abo->client->xdna->dev_info->dev_mem_base;
 }
 
 static inline u64 amdxdna_obj_dma_addr(struct amdxdna_gem_obj *abo)
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c 
b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index a6e9be7960c2..c677293c1ae7 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -126,6 +126,9 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct 
drm_file *filp)
        mmgrab(client->mm);
        init_srcu_struct(&client->hwctx_srcu);
        xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
+       xa_init_flags(&client->dev_heap_xa, XA_FLAGS_ALLOC);
+       drm_mm_init(&client->dev_heap_mm, xdna->dev_info->dev_mem_base,
+                   xdna->dev_info->dev_heap_max_size);
        mutex_init(&client->mm_lock);
 
        mutex_lock(&xdna->dev_lock);
@@ -141,13 +144,18 @@ static int amdxdna_drm_open(struct drm_device *ddev, 
struct drm_file *filp)
 
 static void amdxdna_client_cleanup(struct amdxdna_client *client)
 {
+       struct amdxdna_gem_obj *heap;
+       unsigned long heap_id;
+
        list_del(&client->node);
        amdxdna_hwctx_remove_all(client);
        xa_destroy(&client->hwctx_xa);
        cleanup_srcu_struct(&client->hwctx_srcu);
 
-       if (client->dev_heap)
-               drm_gem_object_put(to_gobj(client->dev_heap));
+       xa_for_each(&client->dev_heap_xa, heap_id, heap)
+               drm_gem_object_put(to_gobj(heap));
+       xa_destroy(&client->dev_heap_xa);
+       drm_mm_takedown(&client->dev_heap_mm);
 
        mutex_destroy(&client->mm_lock);
        mmdrop(client->mm);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h 
b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index 471b72299aee..34271c14d359 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -7,6 +7,7 @@
 #define _AMDXDNA_PCI_DRV_H_
 
 #include <drm/amdxdna_accel.h>
+#include <drm/drm_mm.h>
 #include <drm/drm_print.h>
 #include <linux/iommu.h>
 #include <linux/iova.h>
@@ -61,6 +62,7 @@ struct amdxdna_dev_ops {
        void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
        int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, 
void *buf, u32 size);
        int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 
debug_bo_hdl);
+       int (*hwctx_heap_expand)(struct amdxdna_hwctx *hwctx, struct 
amdxdna_gem_obj *heap);
        void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long 
cur_seq);
        int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job 
*job, u64 *seq);
        int (*cmd_wait)(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
@@ -95,6 +97,7 @@ struct amdxdna_dev_info {
        size_t                          dev_mem_size;
        const char                      *default_vbnv;
        const struct amdxdna_rev_vbnv   *rev_vbnv_tbl;
+       size_t                          dev_heap_max_size;
        const struct amdxdna_dev_priv   *dev_priv;
        const struct amdxdna_fw_feature_tbl *fw_feature_tbl;
        const struct amdxdna_dev_ops    *ops;
@@ -153,7 +156,10 @@ struct amdxdna_client {
        struct drm_file                 *filp;
 
        struct mutex                    mm_lock; /* protect memory related */
-       struct amdxdna_gem_obj          *dev_heap;
+       struct xarray                   dev_heap_xa;
+       struct drm_mm                   dev_heap_mm;
+       u32                             dev_heap_nid;
+       size_t                          total_heap_size;
 
        struct iommu_sva                *sva;
        int                             pasid;
diff --git a/drivers/accel/amdxdna/npu1_regs.c 
b/drivers/accel/amdxdna/npu1_regs.c
index 4e48c030a69f..ca779674017a 100644
--- a/drivers/accel/amdxdna/npu1_regs.c
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -139,6 +139,7 @@ const struct amdxdna_dev_info dev_npu1_info = {
        .dev_mem_base      = AIE2_DEVM_BASE,
        .dev_mem_size      = AIE2_DEVM_SIZE,
        .default_vbnv      = "RyzenAI-npu1",
+       .dev_heap_max_size = AIE2_DEVM_SIZE,
        .device_type       = AMDXDNA_DEV_TYPE_KMQ,
        .dev_priv          = &npu1_dev_priv,
        .fw_feature_tbl    = npu1_fw_feature_table,
diff --git a/drivers/accel/amdxdna/npu4_regs.c 
b/drivers/accel/amdxdna/npu4_regs.c
index eddc31803a50..15a161384625 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -98,6 +98,7 @@ const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[] = 
{
        { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, .min_minor = 15 },
        { .features = BIT_U64(AIE2_UPDATE_PROPERTY), .major = 6, .min_minor = 
15 },
        { .features = BIT_U64(AIE2_APP_HEALTH), .major = 6, .min_minor = 18 },
+       { .features = BIT_U64(AIE2_ADD_HOST_BUFFER), .major = 6, .min_minor = 
18 },
        { .features = BIT_U64(AIE2_GET_DEV_REVISION), .major = 6, .min_minor = 
24 },
        { .features = AIE2_ALL_FEATURES, .major = 7 },
        { 0 }
@@ -200,6 +201,7 @@ const struct amdxdna_dev_info dev_npu4_info = {
        .dev_mem_base      = AIE2_DEVM_BASE,
        .dev_mem_size      = AIE2_DEVM_SIZE,
        .default_vbnv      = "RyzenAI-npu4",
+       .dev_heap_max_size = AIE2_DEVM_MAX_SIZE,
        .device_type       = AMDXDNA_DEV_TYPE_KMQ,
        .rev_vbnv_tbl      = npu4_rev_vbnv_tbl,
        .dev_priv          = &npu4_dev_priv,
diff --git a/drivers/accel/amdxdna/npu5_regs.c 
b/drivers/accel/amdxdna/npu5_regs.c
index a9102978e4a8..306b359d0cd3 100644
--- a/drivers/accel/amdxdna/npu5_regs.c
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -107,6 +107,7 @@ const struct amdxdna_dev_info dev_npu5_info = {
        .dev_mem_base      = AIE2_DEVM_BASE,
        .dev_mem_size      = AIE2_DEVM_SIZE,
        .default_vbnv      = "RyzenAI-npu5",
+       .dev_heap_max_size = AIE2_DEVM_MAX_SIZE,
        .device_type       = AMDXDNA_DEV_TYPE_KMQ,
        .rev_vbnv_tbl      = npu4_rev_vbnv_tbl,
        .dev_priv          = &npu5_dev_priv,
diff --git a/drivers/accel/amdxdna/npu6_regs.c 
b/drivers/accel/amdxdna/npu6_regs.c
index e0db3a09740b..e68637d2a228 100644
--- a/drivers/accel/amdxdna/npu6_regs.c
+++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -108,6 +108,7 @@ const struct amdxdna_dev_info dev_npu6_info = {
        .dev_mem_base      = AIE2_DEVM_BASE,
        .dev_mem_size      = AIE2_DEVM_SIZE,
        .default_vbnv      = "RyzenAI-npu6",
+       .dev_heap_max_size = AIE2_DEVM_MAX_SIZE,
        .device_type       = AMDXDNA_DEV_TYPE_KMQ,
        .rev_vbnv_tbl      = npu4_rev_vbnv_tbl,
        .dev_priv          = &npu6_dev_priv,
-- 
2.34.1

Reply via email to