amdkfd: Map VRAM MQD on GART

Christian König Wed, 03 Dec 2025 08:06:28 -0800

On 12/1/25 15:28, Philip Yang wrote:
> MQD BO on VRAM access via FB aperture is mtype UC uncaching, map to GART
> as mtype RW caching, to reduce queue switch latency.
> 
> TTM bo only has one resource, add GART resource to amdgpu_bo in order to
> support dynamic GART resource and VRAM BO resource.
> 
> Update amdgpu_ttm_gart_bind_gfx9_mqd to map MQD in system or VRAM.
> 
> Add helper amdgpu_ttm_alloc_gart_for_vram_bo to alloc GART entries resource
> for MQD bo->gart_res and bind to GART mapping.


Clear NAK to that approach! That would completely confuse TTM.

We need to talk about that on the weekly meeting first.

Regards,
Christian.

> 
> Signed-off-by: Philip Yang <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |   3 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h    |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 113 +++++++++++++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   1 +
>  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |   8 ++
>  5 files changed, 108 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 926a3f09a776..d267456cd181 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1297,6 +1297,9 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
> *bo)
>  
>       abo = ttm_to_amdgpu_bo(bo);
>  
> +     if (abo->gart_res)
> +             ttm_resource_free(bo, &abo->gart_res);
> +
>       WARN_ON(abo->vm_bo);
>  
>       if (abo->kfd_bo)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 52c2d1731aab..a412f5ec2a09 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -106,6 +106,7 @@ struct amdgpu_bo {
>       struct ttm_place                placements[AMDGPU_BO_MAX_PLACEMENTS];
>       struct ttm_placement            placement;
>       struct ttm_buffer_object        tbo;
> +     struct ttm_resource             *gart_res;
>       struct ttm_bo_kmap_obj          kmap;
>       u64                             flags;
>       /* per VM structure for page tables and with virtual addresses */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 5f58cff2c28b..1d8f5fc66acc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -832,14 +832,27 @@ static void amdgpu_ttm_tt_unpin_userptr(struct 
> ttm_device *bdev,
>   * Ctrl stack and modify their memory type to NC.
>   */
>  static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
> -                             struct ttm_tt *ttm, uint64_t flags)
> +                             struct ttm_buffer_object *tbo,
> +                             uint64_t flags)
>  {
> +     struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
> +     struct ttm_tt *ttm = tbo->ttm;
>       struct amdgpu_ttm_tt *gtt = (void *)ttm;
> -     uint64_t total_pages = ttm->num_pages;
> +     uint64_t total_pages;
>       int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
>       uint64_t page_idx, pages_per_xcc;
> -     int i;
>       uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
> +     int i;
> +
> +     if (!ttm && !abo->gart_res)
> +             return;
> +
> +     if (ttm) {
> +             total_pages = ttm->num_pages;
> +     } else {
> +             WARN_ON_ONCE(abo->gart_res->size != tbo->resource->size);
> +             total_pages = (abo->gart_res->size) >> PAGE_SHIFT;
> +     }
>  
>       flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_RW);
>  
> @@ -847,19 +860,33 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct 
> amdgpu_device *adev,
>       do_div(pages_per_xcc, num_xcc);
>  
>       for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
> -             /* MQD page: use default flags */
> -             amdgpu_gart_bind(adev,
> -                             gtt->offset + (page_idx << PAGE_SHIFT),
> -                             1, &gtt->ttm.dma_address[page_idx], flags);
> -             /*
> -              * Ctrl pages - modify the memory type to NC (ctrl_flags) from
> -              * the second page of the BO onward.
> -              */
> -             amdgpu_gart_bind(adev,
> -                             gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
> -                             pages_per_xcc - 1,
> -                             &gtt->ttm.dma_address[page_idx + 1],
> -                             ctrl_flags);
> +             if (ttm) {
> +                     /* MQD page: use default flags */
> +                     amdgpu_gart_bind(adev,
> +                                     gtt->offset + (page_idx << PAGE_SHIFT),
> +                                     1, &gtt->ttm.dma_address[page_idx], 
> flags);
> +                     /*
> +                      * Ctrl pages - modify the memory type to NC 
> (ctrl_flags) from
> +                      * the second page of the BO onward.
> +                      */
> +                     amdgpu_gart_bind(adev,
> +                                     gtt->offset + ((page_idx + 1) << 
> PAGE_SHIFT),
> +                                     pages_per_xcc - 1,
> +                                     &gtt->ttm.dma_address[page_idx + 1],
> +                                     ctrl_flags);
> +             } else {
> +                     u64 pa = (tbo->resource->start + page_idx) << 
> PAGE_SHIFT;
> +                     u64 start_page = abo->gart_res->start + page_idx;
> +
> +                     pa += adev->vm_manager.vram_base_offset;
> +                     amdgpu_gart_map_vram_range(adev, pa, start_page, 1,
> +                                                flags, NULL);
> +
> +                     amdgpu_gart_map_vram_range(adev, pa + PAGE_SIZE,
> +                                                start_page + 1,
> +                                                pages_per_xcc - 1,
> +                                                ctrl_flags, NULL);
> +             }
>       }
>  }
>  
> @@ -875,12 +902,14 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device 
> *adev,
>               flags |= AMDGPU_PTE_TMZ;
>  
>       if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
> -             amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
> +             amdgpu_ttm_gart_bind_gfx9_mqd(adev, tbo, flags);
>       } else {
>               amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
>                                gtt->ttm.dma_address, flags);
>       }
> -     gtt->bound = true;
> +
> +     if (ttm)
> +             gtt->bound = true;
>  }
>  
>  /*
> @@ -1000,6 +1029,54 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
>       return 0;
>  }
>  
> +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
> +                               u64 *gpu_addr)
> +{
> +     struct ttm_buffer_object *bo = &abo->tbo;
> +     struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> +     struct ttm_operation_ctx ctx = { false, false };
> +     struct ttm_placement placement;
> +     struct ttm_place placements;
> +     struct ttm_resource *res;
> +     uint64_t flags;
> +     int r;
> +
> +     /* Only for valid VRAM bo resource */
> +     if (bo->resource->start == AMDGPU_BO_INVALID_OFFSET)
> +             return 0;
> +
> +     r = amdgpu_bo_reserve(abo, false);
> +     if (unlikely(r))
> +             return r;
> +
> +     /* allocate GART space */
> +     placement.num_placement = 1;
> +     placement.placement = &placements;
> +     placements.fpfn = 0;
> +     placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
> +     placements.mem_type = TTM_PL_TT;
> +     placements.flags = bo->resource->placement;
> +
> +     r = ttm_bo_mem_space(bo, &placement, &res, &ctx);
> +     if (unlikely(r))
> +             goto out_unreserve;
> +
> +     /* compute PTE flags for this buffer object */
> +     flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource);
> +
> +     /* Bind VRAM pages */
> +     abo->gart_res = res;
> +
> +     amdgpu_ttm_gart_bind(adev, bo, flags);
> +     amdgpu_gart_invalidate_tlb(adev);
> +
> +     *gpu_addr = res->start << PAGE_SHIFT;
> +
> +out_unreserve:
> +     amdgpu_bo_unreserve(abo);
> +     return r;
> +}
> +
>  /*
>   * amdgpu_ttm_recover_gart - Rebind GTT pages
>   *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 15e659575087..707654732759 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -179,6 +179,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>                       u64 k_job_id);
>  
>  int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
> +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo, u64 *gpu_addr);
>  void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>  uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
>  
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> index c6945c842267..d96de02c6bb9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> @@ -148,6 +148,14 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node 
> *node,
>                       kfree(mqd_mem_obj);
>                       return NULL;
>               }
> +
> +             retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->gtt_mem,
> +                                                    
> &(mqd_mem_obj->gpu_addr));
> +             if (retval) {
> +                     amdgpu_amdkfd_free_gtt_mem(node->adev, 
> &(mqd_mem_obj->gtt_mem));
> +                     kfree(mqd_mem_obj);
> +                     return NULL;
> +             }
>       } else {
>               retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
>                               &mqd_mem_obj);

Re: [PATCH v2 4/4] drm/amdkfd: Map VRAM MQD on GART

Reply via email to