On 12/4/25 23:43, Philip Yang wrote:
> On 2025-12-03 11:06, Christian König wrote:
>> On 12/1/25 15:28, Philip Yang wrote:
>>> MQD BO on VRAM access via FB aperture is mtype UC uncaching, map to GART
>>> as mtype RW caching, to reduce queue switch latency.
>>>
>>> TTM bo only has one resource, add GART resource to amdgpu_bo in order to
>>> support dynamic GART resource and VRAM BO resource.
>>>
>>> Update amdgpu_ttm_gart_bind_gfx9_mqd to map MQD in system or VRAM.
>>>
>>> Add helper amdgpu_ttm_alloc_gart_for_vram_bo to alloc GART entries resource
>>> for MQD bo->gart_res and bind to GART mapping.
>> Clear NAK to that approach! That would completely confuse TTM.
>>
>> We need to talk about that on the weekly meeting first.
> I think that is becausettm_bo_mem_space also add GART resource into ttm bo, 
> which already hold
> VRAM resource. Felix suggest to alloc gart space via drm_mm and store in mqd 
> structure, not in amdgpu_bo.
> I will implement it in next version and we can discuss details in the meeting.

Yes, that sounds like a good approach to me.

We could implement GART as separate TTM domain and then have the ability to map 
anything to it, but that is a much wider change.

Probably good to do that in the long term, but also tricky to get right.

Regards,
Christian.

> 
> Regards,
> Philip
>>
>> Regards,
>> Christian.
>>
>>> Signed-off-by: Philip Yang <[email protected]>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |   3 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h    |   1 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 113 +++++++++++++++---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   1 +
>>>   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |   8 ++
>>>   5 files changed, 108 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> index 926a3f09a776..d267456cd181 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> @@ -1297,6 +1297,9 @@ void amdgpu_bo_release_notify(struct 
>>> ttm_buffer_object *bo)
>>>         abo = ttm_to_amdgpu_bo(bo);
>>>   +    if (abo->gart_res)
>>> +        ttm_resource_free(bo, &abo->gart_res);
>>> +
>>>       WARN_ON(abo->vm_bo);
>>>         if (abo->kfd_bo)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> index 52c2d1731aab..a412f5ec2a09 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> @@ -106,6 +106,7 @@ struct amdgpu_bo {
>>>       struct ttm_place        placements[AMDGPU_BO_MAX_PLACEMENTS];
>>>       struct ttm_placement        placement;
>>>       struct ttm_buffer_object    tbo;
>>> +    struct ttm_resource        *gart_res;
>>>       struct ttm_bo_kmap_obj        kmap;
>>>       u64                flags;
>>>       /* per VM structure for page tables and with virtual addresses */
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 5f58cff2c28b..1d8f5fc66acc 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -832,14 +832,27 @@ static void amdgpu_ttm_tt_unpin_userptr(struct 
>>> ttm_device *bdev,
>>>    * Ctrl stack and modify their memory type to NC.
>>>    */
>>>   static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
>>> -                struct ttm_tt *ttm, uint64_t flags)
>>> +                struct ttm_buffer_object *tbo,
>>> +                uint64_t flags)
>>>   {
>>> +    struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
>>> +    struct ttm_tt *ttm = tbo->ttm;
>>>       struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>> -    uint64_t total_pages = ttm->num_pages;
>>> +    uint64_t total_pages;
>>>       int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
>>>       uint64_t page_idx, pages_per_xcc;
>>> -    int i;
>>>       uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
>>> +    int i;
>>> +
>>> +    if (!ttm && !abo->gart_res)
>>> +        return;
>>> +
>>> +    if (ttm) {
>>> +        total_pages = ttm->num_pages;
>>> +    } else {
>>> +        WARN_ON_ONCE(abo->gart_res->size != tbo->resource->size);
>>> +        total_pages = (abo->gart_res->size) >> PAGE_SHIFT;
>>> +    }
>>>         flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_RW);
>>>   @@ -847,19 +860,33 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct 
>>> amdgpu_device *adev,
>>>       do_div(pages_per_xcc, num_xcc);
>>>         for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += 
>>> pages_per_xcc) {
>>> -        /* MQD page: use default flags */
>>> -        amdgpu_gart_bind(adev,
>>> -                gtt->offset + (page_idx << PAGE_SHIFT),
>>> -                1, &gtt->ttm.dma_address[page_idx], flags);
>>> -        /*
>>> -         * Ctrl pages - modify the memory type to NC (ctrl_flags) from
>>> -         * the second page of the BO onward.
>>> -         */
>>> -        amdgpu_gart_bind(adev,
>>> -                gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
>>> -                pages_per_xcc - 1,
>>> -                &gtt->ttm.dma_address[page_idx + 1],
>>> -                ctrl_flags);
>>> +        if (ttm) {
>>> +            /* MQD page: use default flags */
>>> +            amdgpu_gart_bind(adev,
>>> +                    gtt->offset + (page_idx << PAGE_SHIFT),
>>> +                    1, &gtt->ttm.dma_address[page_idx], flags);
>>> +            /*
>>> +             * Ctrl pages - modify the memory type to NC (ctrl_flags) from
>>> +             * the second page of the BO onward.
>>> +             */
>>> +            amdgpu_gart_bind(adev,
>>> +                    gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
>>> +                    pages_per_xcc - 1,
>>> +                    &gtt->ttm.dma_address[page_idx + 1],
>>> +                    ctrl_flags);
>>> +        } else {
>>> +            u64 pa = (tbo->resource->start + page_idx) << PAGE_SHIFT;
>>> +            u64 start_page = abo->gart_res->start + page_idx;
>>> +
>>> +            pa += adev->vm_manager.vram_base_offset;
>>> +            amdgpu_gart_map_vram_range(adev, pa, start_page, 1,
>>> +                           flags, NULL);
>>> +
>>> +            amdgpu_gart_map_vram_range(adev, pa + PAGE_SIZE,
>>> +                           start_page + 1,
>>> +                           pages_per_xcc - 1,
>>> +                           ctrl_flags, NULL);
>>> +        }
>>>       }
>>>   }
>>>   @@ -875,12 +902,14 @@ static void amdgpu_ttm_gart_bind(struct 
>>> amdgpu_device *adev,
>>>           flags |= AMDGPU_PTE_TMZ;
>>>         if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
>>> -        amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
>>> +        amdgpu_ttm_gart_bind_gfx9_mqd(adev, tbo, flags);
>>>       } else {
>>>           amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
>>>                    gtt->ttm.dma_address, flags);
>>>       }
>>> -    gtt->bound = true;
>>> +
>>> +    if (ttm)
>>> +        gtt->bound = true;
>>>   }
>>>     /*
>>> @@ -1000,6 +1029,54 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object 
>>> *bo)
>>>       return 0;
>>>   }
>>>   +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo,
>>> +                  u64 *gpu_addr)
>>> +{
>>> +    struct ttm_buffer_object *bo = &abo->tbo;
>>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
>>> +    struct ttm_operation_ctx ctx = { false, false };
>>> +    struct ttm_placement placement;
>>> +    struct ttm_place placements;
>>> +    struct ttm_resource *res;
>>> +    uint64_t flags;
>>> +    int r;
>>> +
>>> +    /* Only for valid VRAM bo resource */
>>> +    if (bo->resource->start == AMDGPU_BO_INVALID_OFFSET)
>>> +        return 0;
>>> +
>>> +    r = amdgpu_bo_reserve(abo, false);
>>> +    if (unlikely(r))
>>> +        return r;
>>> +
>>> +    /* allocate GART space */
>>> +    placement.num_placement = 1;
>>> +    placement.placement = &placements;
>>> +    placements.fpfn = 0;
>>> +    placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
>>> +    placements.mem_type = TTM_PL_TT;
>>> +    placements.flags = bo->resource->placement;
>>> +
>>> +    r = ttm_bo_mem_space(bo, &placement, &res, &ctx);
>>> +    if (unlikely(r))
>>> +        goto out_unreserve;
>>> +
>>> +    /* compute PTE flags for this buffer object */
>>> +    flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource);
>>> +
>>> +    /* Bind VRAM pages */
>>> +    abo->gart_res = res;
>>> +
>>> +    amdgpu_ttm_gart_bind(adev, bo, flags);
>>> +    amdgpu_gart_invalidate_tlb(adev);
>>> +
>>> +    *gpu_addr = res->start << PAGE_SHIFT;
>>> +
>>> +out_unreserve:
>>> +    amdgpu_bo_unreserve(abo);
>>> +    return r;
>>> +}
>>> +
>>>   /*
>>>    * amdgpu_ttm_recover_gart - Rebind GTT pages
>>>    *
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> index 15e659575087..707654732759 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> @@ -179,6 +179,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>               u64 k_job_id);
>>>     int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
>>> +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo, u64 *gpu_addr);
>>>   void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>>>   uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t 
>>> type);
>>>   diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>>> index c6945c842267..d96de02c6bb9 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>>> @@ -148,6 +148,14 @@ static struct kfd_mem_obj *allocate_mqd(struct 
>>> kfd_node *node,
>>>               kfree(mqd_mem_obj);
>>>               return NULL;
>>>           }
>>> +
>>> +        retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->gtt_mem,
>>> +                               &(mqd_mem_obj->gpu_addr));
>>> +        if (retval) {
>>> +            amdgpu_amdkfd_free_gtt_mem(node->adev, 
>>> &(mqd_mem_obj->gtt_mem));
>>> +            kfree(mqd_mem_obj);
>>> +            return NULL;
>>> +        }
>>>       } else {
>>>           retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
>>>                   &mqd_mem_obj);
> 

Reply via email to