On 2025-12-03 11:06, Christian König wrote:
On 12/1/25 15:28, Philip Yang wrote:
MQD BO on VRAM access via FB aperture is mtype UC uncaching, map to GART
as mtype RW caching, to reduce queue switch latency.

TTM bo only has one resource, add GART resource to amdgpu_bo in order to
support dynamic GART resource and VRAM BO resource.

Update amdgpu_ttm_gart_bind_gfx9_mqd to map MQD in system or VRAM.

Add helper amdgpu_ttm_alloc_gart_for_vram_bo to alloc GART entries resource
for MQD bo->gart_res and bind to GART mapping.
Clear NAK to that approach! That would completely confuse TTM.

We need to talk about that on the weekly meeting first.
I think that is becausettm_bo_mem_space also add GART resource into ttm bo, which already hold VRAM resource. Felix suggest to alloc gart space via drm_mm and store in mqd structure, not in amdgpu_bo. I will implement it in next version and we can discuss details in the meeting.

Regards,
Philip

Regards,
Christian.

Signed-off-by: Philip Yang <[email protected]>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |   3 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h    |   1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 113 +++++++++++++++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   1 +
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |   8 ++
  5 files changed, 108 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 926a3f09a776..d267456cd181 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1297,6 +1297,9 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
abo = ttm_to_amdgpu_bo(bo); + if (abo->gart_res)
+               ttm_resource_free(bo, &abo->gart_res);
+
        WARN_ON(abo->vm_bo);
if (abo->kfd_bo)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 52c2d1731aab..a412f5ec2a09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -106,6 +106,7 @@ struct amdgpu_bo {
        struct ttm_place                placements[AMDGPU_BO_MAX_PLACEMENTS];
        struct ttm_placement            placement;
        struct ttm_buffer_object        tbo;
+       struct ttm_resource             *gart_res;
        struct ttm_bo_kmap_obj          kmap;
        u64                             flags;
        /* per VM structure for page tables and with virtual addresses */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 5f58cff2c28b..1d8f5fc66acc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -832,14 +832,27 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device 
*bdev,
   * Ctrl stack and modify their memory type to NC.
   */
  static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
-                               struct ttm_tt *ttm, uint64_t flags)
+                               struct ttm_buffer_object *tbo,
+                               uint64_t flags)
  {
+       struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
+       struct ttm_tt *ttm = tbo->ttm;
        struct amdgpu_ttm_tt *gtt = (void *)ttm;
-       uint64_t total_pages = ttm->num_pages;
+       uint64_t total_pages;
        int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
        uint64_t page_idx, pages_per_xcc;
-       int i;
        uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
+       int i;
+
+       if (!ttm && !abo->gart_res)
+               return;
+
+       if (ttm) {
+               total_pages = ttm->num_pages;
+       } else {
+               WARN_ON_ONCE(abo->gart_res->size != tbo->resource->size);
+               total_pages = (abo->gart_res->size) >> PAGE_SHIFT;
+       }
flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_RW); @@ -847,19 +860,33 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
        do_div(pages_per_xcc, num_xcc);
for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
-               /* MQD page: use default flags */
-               amdgpu_gart_bind(adev,
-                               gtt->offset + (page_idx << PAGE_SHIFT),
-                               1, &gtt->ttm.dma_address[page_idx], flags);
-               /*
-                * Ctrl pages - modify the memory type to NC (ctrl_flags) from
-                * the second page of the BO onward.
-                */
-               amdgpu_gart_bind(adev,
-                               gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
-                               pages_per_xcc - 1,
-                               &gtt->ttm.dma_address[page_idx + 1],
-                               ctrl_flags);
+               if (ttm) {
+                       /* MQD page: use default flags */
+                       amdgpu_gart_bind(adev,
+                                       gtt->offset + (page_idx << PAGE_SHIFT),
+                                       1, &gtt->ttm.dma_address[page_idx], 
flags);
+                       /*
+                        * Ctrl pages - modify the memory type to NC 
(ctrl_flags) from
+                        * the second page of the BO onward.
+                        */
+                       amdgpu_gart_bind(adev,
+                                       gtt->offset + ((page_idx + 1) << 
PAGE_SHIFT),
+                                       pages_per_xcc - 1,
+                                       &gtt->ttm.dma_address[page_idx + 1],
+                                       ctrl_flags);
+               } else {
+                       u64 pa = (tbo->resource->start + page_idx) << 
PAGE_SHIFT;
+                       u64 start_page = abo->gart_res->start + page_idx;
+
+                       pa += adev->vm_manager.vram_base_offset;
+                       amdgpu_gart_map_vram_range(adev, pa, start_page, 1,
+                                                  flags, NULL);
+
+                       amdgpu_gart_map_vram_range(adev, pa + PAGE_SIZE,
+                                                  start_page + 1,
+                                                  pages_per_xcc - 1,
+                                                  ctrl_flags, NULL);
+               }
        }
  }
@@ -875,12 +902,14 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
                flags |= AMDGPU_PTE_TMZ;
if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
-               amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
+               amdgpu_ttm_gart_bind_gfx9_mqd(adev, tbo, flags);
        } else {
                amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
                                 gtt->ttm.dma_address, flags);
        }
-       gtt->bound = true;
+
+       if (ttm)
+               gtt->bound = true;
  }
/*
@@ -1000,6 +1029,54 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
        return 0;
  }
+int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
+                                 u64 *gpu_addr)
+{
+       struct ttm_buffer_object *bo = &abo->tbo;
+       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+       struct ttm_operation_ctx ctx = { false, false };
+       struct ttm_placement placement;
+       struct ttm_place placements;
+       struct ttm_resource *res;
+       uint64_t flags;
+       int r;
+
+       /* Only for valid VRAM bo resource */
+       if (bo->resource->start == AMDGPU_BO_INVALID_OFFSET)
+               return 0;
+
+       r = amdgpu_bo_reserve(abo, false);
+       if (unlikely(r))
+               return r;
+
+       /* allocate GART space */
+       placement.num_placement = 1;
+       placement.placement = &placements;
+       placements.fpfn = 0;
+       placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
+       placements.mem_type = TTM_PL_TT;
+       placements.flags = bo->resource->placement;
+
+       r = ttm_bo_mem_space(bo, &placement, &res, &ctx);
+       if (unlikely(r))
+               goto out_unreserve;
+
+       /* compute PTE flags for this buffer object */
+       flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource);
+
+       /* Bind VRAM pages */
+       abo->gart_res = res;
+
+       amdgpu_ttm_gart_bind(adev, bo, flags);
+       amdgpu_gart_invalidate_tlb(adev);
+
+       *gpu_addr = res->start << PAGE_SHIFT;
+
+out_unreserve:
+       amdgpu_bo_unreserve(abo);
+       return r;
+}
+
  /*
   * amdgpu_ttm_recover_gart - Rebind GTT pages
   *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 15e659575087..707654732759 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -179,6 +179,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
                        u64 k_job_id);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
+int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo, u64 *gpu_addr);
  void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
  uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index c6945c842267..d96de02c6bb9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -148,6 +148,14 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node 
*node,
                        kfree(mqd_mem_obj);
                        return NULL;
                }
+
+               retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->gtt_mem,
+                                                      
&(mqd_mem_obj->gpu_addr));
+               if (retval) {
+                       amdgpu_amdkfd_free_gtt_mem(node->adev, 
&(mqd_mem_obj->gtt_mem));
+                       kfree(mqd_mem_obj);
+                       return NULL;
+               }
        } else {
                retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
                                &mqd_mem_obj);

Reply via email to