On 12/4/25 23:43, Philip Yang wrote: > On 2025-12-03 11:06, Christian König wrote: >> On 12/1/25 15:28, Philip Yang wrote: >>> MQD BO on VRAM access via FB aperture is mtype UC uncaching, map to GART >>> as mtype RW caching, to reduce queue switch latency. >>> >>> TTM bo only has one resource, add GART resource to amdgpu_bo in order to >>> support dynamic GART resource and VRAM BO resource. >>> >>> Update amdgpu_ttm_gart_bind_gfx9_mqd to map MQD in system or VRAM. >>> >>> Add helper amdgpu_ttm_alloc_gart_for_vram_bo to alloc GART entries resource >>> for MQD bo->gart_res and bind to GART mapping. >> Clear NAK to that approach! That would completely confuse TTM. >> >> We need to talk about that on the weekly meeting first. > I think that is becausettm_bo_mem_space also add GART resource into ttm bo, > which already hold > VRAM resource. Felix suggest to alloc gart space via drm_mm and store in mqd > structure, not in amdgpu_bo. > I will implement it in next version and we can discuss details in the meeting.
Yes, that sounds like a good approach to me. We could implement GART as separate TTM domain and then have the ability to map anything to it, but that is a much wider change. Probably good to do that in the long term, but also tricky to get right. Regards, Christian. > > Regards, > Philip >> >> Regards, >> Christian. >> >>> Signed-off-by: Philip Yang <[email protected]> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 + >>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 + >>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 113 +++++++++++++++--- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 + >>> .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 8 ++ >>> 5 files changed, 108 insertions(+), 18 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >>> index 926a3f09a776..d267456cd181 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >>> @@ -1297,6 +1297,9 @@ void amdgpu_bo_release_notify(struct >>> ttm_buffer_object *bo) >>> abo = ttm_to_amdgpu_bo(bo); >>> + if (abo->gart_res) >>> + ttm_resource_free(bo, &abo->gart_res); >>> + >>> WARN_ON(abo->vm_bo); >>> if (abo->kfd_bo) >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >>> index 52c2d1731aab..a412f5ec2a09 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >>> @@ -106,6 +106,7 @@ struct amdgpu_bo { >>> struct ttm_place placements[AMDGPU_BO_MAX_PLACEMENTS]; >>> struct ttm_placement placement; >>> struct ttm_buffer_object tbo; >>> + struct ttm_resource *gart_res; >>> struct ttm_bo_kmap_obj kmap; >>> u64 flags; >>> /* per VM structure for page tables and with virtual addresses */ >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >>> index 5f58cff2c28b..1d8f5fc66acc 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >>> @@ -832,14 +832,27 @@ static void amdgpu_ttm_tt_unpin_userptr(struct >>> ttm_device *bdev, >>> * Ctrl stack and modify their memory type to NC. >>> */ >>> static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, >>> - struct ttm_tt *ttm, uint64_t flags) >>> + struct ttm_buffer_object *tbo, >>> + uint64_t flags) >>> { >>> + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); >>> + struct ttm_tt *ttm = tbo->ttm; >>> struct amdgpu_ttm_tt *gtt = (void *)ttm; >>> - uint64_t total_pages = ttm->num_pages; >>> + uint64_t total_pages; >>> int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); >>> uint64_t page_idx, pages_per_xcc; >>> - int i; >>> uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC); >>> + int i; >>> + >>> + if (!ttm && !abo->gart_res) >>> + return; >>> + >>> + if (ttm) { >>> + total_pages = ttm->num_pages; >>> + } else { >>> + WARN_ON_ONCE(abo->gart_res->size != tbo->resource->size); >>> + total_pages = (abo->gart_res->size) >> PAGE_SHIFT; >>> + } >>> flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_RW); >>> @@ -847,19 +860,33 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct >>> amdgpu_device *adev, >>> do_div(pages_per_xcc, num_xcc); >>> for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += >>> pages_per_xcc) { >>> - /* MQD page: use default flags */ >>> - amdgpu_gart_bind(adev, >>> - gtt->offset + (page_idx << PAGE_SHIFT), >>> - 1, >t->ttm.dma_address[page_idx], flags); >>> - /* >>> - * Ctrl pages - modify the memory type to NC (ctrl_flags) from >>> - * the second page of the BO onward. >>> - */ >>> - amdgpu_gart_bind(adev, >>> - gtt->offset + ((page_idx + 1) << PAGE_SHIFT), >>> - pages_per_xcc - 1, >>> - >t->ttm.dma_address[page_idx + 1], >>> - ctrl_flags); >>> + if (ttm) { >>> + /* MQD page: use default flags */ >>> + amdgpu_gart_bind(adev, >>> + gtt->offset + (page_idx << PAGE_SHIFT), >>> + 1, >t->ttm.dma_address[page_idx], flags); >>> + /* >>> + * Ctrl pages - modify the memory type to NC (ctrl_flags) from >>> + * the second page of the BO onward. >>> + */ >>> + amdgpu_gart_bind(adev, >>> + gtt->offset + ((page_idx + 1) << PAGE_SHIFT), >>> + pages_per_xcc - 1, >>> + >t->ttm.dma_address[page_idx + 1], >>> + ctrl_flags); >>> + } else { >>> + u64 pa = (tbo->resource->start + page_idx) << PAGE_SHIFT; >>> + u64 start_page = abo->gart_res->start + page_idx; >>> + >>> + pa += adev->vm_manager.vram_base_offset; >>> + amdgpu_gart_map_vram_range(adev, pa, start_page, 1, >>> + flags, NULL); >>> + >>> + amdgpu_gart_map_vram_range(adev, pa + PAGE_SIZE, >>> + start_page + 1, >>> + pages_per_xcc - 1, >>> + ctrl_flags, NULL); >>> + } >>> } >>> } >>> @@ -875,12 +902,14 @@ static void amdgpu_ttm_gart_bind(struct >>> amdgpu_device *adev, >>> flags |= AMDGPU_PTE_TMZ; >>> if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { >>> - amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags); >>> + amdgpu_ttm_gart_bind_gfx9_mqd(adev, tbo, flags); >>> } else { >>> amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, >>> gtt->ttm.dma_address, flags); >>> } >>> - gtt->bound = true; >>> + >>> + if (ttm) >>> + gtt->bound = true; >>> } >>> /* >>> @@ -1000,6 +1029,54 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object >>> *bo) >>> return 0; >>> } >>> +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo, >>> + u64 *gpu_addr) >>> +{ >>> + struct ttm_buffer_object *bo = &abo->tbo; >>> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); >>> + struct ttm_operation_ctx ctx = { false, false }; >>> + struct ttm_placement placement; >>> + struct ttm_place placements; >>> + struct ttm_resource *res; >>> + uint64_t flags; >>> + int r; >>> + >>> + /* Only for valid VRAM bo resource */ >>> + if (bo->resource->start == AMDGPU_BO_INVALID_OFFSET) >>> + return 0; >>> + >>> + r = amdgpu_bo_reserve(abo, false); >>> + if (unlikely(r)) >>> + return r; >>> + >>> + /* allocate GART space */ >>> + placement.num_placement = 1; >>> + placement.placement = &placements; >>> + placements.fpfn = 0; >>> + placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; >>> + placements.mem_type = TTM_PL_TT; >>> + placements.flags = bo->resource->placement; >>> + >>> + r = ttm_bo_mem_space(bo, &placement, &res, &ctx); >>> + if (unlikely(r)) >>> + goto out_unreserve; >>> + >>> + /* compute PTE flags for this buffer object */ >>> + flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource); >>> + >>> + /* Bind VRAM pages */ >>> + abo->gart_res = res; >>> + >>> + amdgpu_ttm_gart_bind(adev, bo, flags); >>> + amdgpu_gart_invalidate_tlb(adev); >>> + >>> + *gpu_addr = res->start << PAGE_SHIFT; >>> + >>> +out_unreserve: >>> + amdgpu_bo_unreserve(abo); >>> + return r; >>> +} >>> + >>> /* >>> * amdgpu_ttm_recover_gart - Rebind GTT pages >>> * >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >>> index 15e659575087..707654732759 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >>> @@ -179,6 +179,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, >>> u64 k_job_id); >>> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); >>> +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo, u64 *gpu_addr); >>> void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); >>> uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t >>> type); >>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c >>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c >>> index c6945c842267..d96de02c6bb9 100644 >>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c >>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c >>> @@ -148,6 +148,14 @@ static struct kfd_mem_obj *allocate_mqd(struct >>> kfd_node *node, >>> kfree(mqd_mem_obj); >>> return NULL; >>> } >>> + >>> + retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->gtt_mem, >>> + &(mqd_mem_obj->gpu_addr)); >>> + if (retval) { >>> + amdgpu_amdkfd_free_gtt_mem(node->adev, >>> &(mqd_mem_obj->gtt_mem)); >>> + kfree(mqd_mem_obj); >>> + return NULL; >>> + } >>> } else { >>> retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd), >>> &mqd_mem_obj); >
