On 12/5/25 22:49, Philip Yang wrote:
> MQD BO on VRAM access via FB aperture is mtype UC uncaching, map
> to GART as mtype RW caching, to reduce queue switch latency.
>
> Add helper amdgpu_ttm_alloc/free_gart_entries.
> Add helper amdgpu_ttm_gart_bind_gfx9_mqd_vram to bind VRAM pages
> to GART mapping.
>
> Add GART drm mm_node to kfd mem obj to free the GART entries after
> MQD is freed.
>
> Signed-off-by: Philip Yang <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 103 ++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 8 ++
> drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 1 +
> .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 9 ++
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
> 5 files changed, 122 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 4f8bc7f35cdc..fc6f4daa9b87 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -880,6 +880,42 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct
> amdgpu_device *adev,
> }
> }
>
> +static void amdgpu_ttm_gart_bind_gfx9_mqd_vram(struct amdgpu_device *adev,
> + struct ttm_buffer_object *tbo,
> + struct drm_mm_node *mm_node,
> + uint64_t flags)
> +{
> + uint64_t total_pages;
> + int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
> + uint64_t page_idx, pages_per_xcc;
> + uint64_t ctrl_flags = flags;
> + int i;
> +
> + total_pages = tbo->resource->size >> PAGE_SHIFT;
> +
> + amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_NC,
> &ctrl_flags);
> +
> + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 3))
> + amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_RW,
> &flags);
> +
> + pages_per_xcc = total_pages;
> + do_div(pages_per_xcc, num_xcc);
> +
> + for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
> + u64 pa = (tbo->resource->start + page_idx) << PAGE_SHIFT;
> + u64 start_page = mm_node->start + page_idx;
Don't use resource->start and ḿm_node->start directly. Use the resource
iterators for that.
> +
> + pa += adev->vm_manager.vram_base_offset;
> + amdgpu_gart_map_vram_range(adev, pa, start_page, 1,
> + flags, NULL);
> +
> + amdgpu_gart_map_vram_range(adev, pa + PAGE_SIZE,
> + start_page + 1,
> + pages_per_xcc - 1,
> + ctrl_flags, NULL);
> + }
> +}
> +s
> static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
> struct ttm_buffer_object *tbo,
> uint64_t flags)
> @@ -1017,6 +1053,73 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
> return 0;
> }
>
> +int amdgpu_ttm_alloc_gart_entries(struct amdgpu_device *adev,
> + struct drm_mm_node *mm_node,
> + u64 num_pages)
> +{
> + struct ttm_resource_manager *man;
> + struct amdgpu_gtt_mgr *mgr;
> + int r;
> +
> + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
> + mgr = container_of(man, struct amdgpu_gtt_mgr, manager);
> +
> + spin_lock(&mgr->lock);
> + r = drm_mm_insert_node_in_range(&mgr->mm, mm_node, num_pages,
> + 0, 0, 0,
> + adev->gmc.gart_size >> PAGE_SHIFT,
> + DRM_MM_INSERT_BEST);
That belongs into amdgpu_gtt_mgr.c and clearly not here!
Regards,
Christian.
> + spin_unlock(&mgr->lock);
> + return r;
> +}
> +
> +void amdgpu_ttm_free_gart_entries(struct amdgpu_device *adev,
> + struct drm_mm_node *mm_node)
> +{
> + struct ttm_resource_manager *man;
> + struct amdgpu_gtt_mgr *mgr;
> +
> + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
> + mgr = container_of(man, struct amdgpu_gtt_mgr, manager);
> +
> + spin_lock(&mgr->lock);
> + if (drm_mm_node_allocated(mm_node))
> + drm_mm_remove_node(mm_node);
> + spin_unlock(&mgr->lock);
> +}
> +
> +/*
> + * amdgpu_ttm_alloc_gart_vram_bo - Bind VRAM pages to GART mapping
> + *
> + * call amdgpu_ttm_alloc_gart_entries to alloc GART dynamically
> + */
> +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
> + struct drm_mm_node *mm_node,
> + u64 *gpu_addr)
> +{
> + struct ttm_buffer_object *bo = &abo->tbo;
> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> + uint64_t flags;
> + int r;
> +
> + /* Only for valid VRAM bo resource */
> + if (bo->resource->start == AMDGPU_BO_INVALID_OFFSET)
> + return 0;
> +
> + r = amdgpu_ttm_alloc_gart_entries(adev, mm_node,
> + amdgpu_bo_ngpu_pages(abo));
> + if (r)
> + return r;
> +
> + /* compute PTE flags for this buffer object */
> + flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource);
> + amdgpu_ttm_gart_bind_gfx9_mqd_vram(adev, bo, mm_node, flags);
> + amdgpu_gart_invalidate_tlb(adev);
> +
> + *gpu_addr = mm_node->start << PAGE_SHIFT;
> + return 0;
> +}
> +
> /*
> * amdgpu_ttm_recover_gart - Rebind GTT pages
> *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 72488124aa59..cb6123358843 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -185,6 +185,14 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity
> *entity,
> u64 k_job_id);
>
> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
> +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
> + struct drm_mm_node *mm_node,
> + u64 *gpu_addr);
> +int amdgpu_ttm_alloc_gart_entries(struct amdgpu_device *adev,
> + struct drm_mm_node *mm_node,
> + u64 num_pages);
> +void amdgpu_ttm_free_gart_entries(struct amdgpu_device *adev,
> + struct drm_mm_node *mm_node);
> void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
> uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> index f78b249e1a41..00e1e5b30a3a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
> @@ -225,6 +225,7 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
> struct kfd_mem_obj *mqd_mem_obj)
> {
> if (mqd_mem_obj->mem) {
> + amdgpu_ttm_free_gart_entries(mm->dev->adev,
> &mqd_mem_obj->mm_node);
> amdgpu_amdkfd_free_kernel_mem(mm->dev->adev, &mqd_mem_obj->mem);
> kfree(mqd_mem_obj);
> } else {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> index 14123e1a9716..5828220056bd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> @@ -148,6 +148,15 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node
> *node,
> kfree(mqd_mem_obj);
> return NULL;
> }
> +
> + retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->mem,
> + &mqd_mem_obj->mm_node,
> +
> &(mqd_mem_obj->gpu_addr));
> + if (retval) {
> + amdgpu_amdkfd_free_kernel_mem(node->adev,
> &(mqd_mem_obj->mem));
> + kfree(mqd_mem_obj);
> + return NULL;
> + }
> } else {
> retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
> &mqd_mem_obj);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 29419b3249cf..fdde907836fb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -252,6 +252,7 @@ struct kfd_mem_obj {
> uint64_t gpu_addr;
> uint32_t *cpu_ptr;
> void *mem;
> + struct drm_mm_node mm_node;
> };
>
> struct kfd_vmid_info {