On 12/1/25 15:28, Philip Yang wrote: > MQD BO on VRAM access via FB aperture is mtype UC uncaching, map to GART > as mtype RW caching, to reduce queue switch latency. > > TTM bo only has one resource, add GART resource to amdgpu_bo in order to > support dynamic GART resource and VRAM BO resource. > > Update amdgpu_ttm_gart_bind_gfx9_mqd to map MQD in system or VRAM. > > Add helper amdgpu_ttm_alloc_gart_for_vram_bo to alloc GART entries resource > for MQD bo->gart_res and bind to GART mapping.
Clear NAK to that approach! That would completely confuse TTM. We need to talk about that on the weekly meeting first. Regards, Christian. > > Signed-off-by: Philip Yang <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 + > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 113 +++++++++++++++--- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 + > .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 8 ++ > 5 files changed, 108 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > index 926a3f09a776..d267456cd181 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > @@ -1297,6 +1297,9 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object > *bo) > > abo = ttm_to_amdgpu_bo(bo); > > + if (abo->gart_res) > + ttm_resource_free(bo, &abo->gart_res); > + > WARN_ON(abo->vm_bo); > > if (abo->kfd_bo) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > index 52c2d1731aab..a412f5ec2a09 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > @@ -106,6 +106,7 @@ struct amdgpu_bo { > struct ttm_place placements[AMDGPU_BO_MAX_PLACEMENTS]; > struct ttm_placement placement; > struct ttm_buffer_object tbo; > + struct ttm_resource *gart_res; > struct ttm_bo_kmap_obj kmap; > u64 flags; > /* per VM structure for page tables and with virtual addresses */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 5f58cff2c28b..1d8f5fc66acc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -832,14 +832,27 @@ static void amdgpu_ttm_tt_unpin_userptr(struct > ttm_device *bdev, > * Ctrl stack and modify their memory type to NC. > */ > static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, > - struct ttm_tt *ttm, uint64_t flags) > + struct ttm_buffer_object *tbo, > + uint64_t flags) > { > + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); > + struct ttm_tt *ttm = tbo->ttm; > struct amdgpu_ttm_tt *gtt = (void *)ttm; > - uint64_t total_pages = ttm->num_pages; > + uint64_t total_pages; > int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); > uint64_t page_idx, pages_per_xcc; > - int i; > uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC); > + int i; > + > + if (!ttm && !abo->gart_res) > + return; > + > + if (ttm) { > + total_pages = ttm->num_pages; > + } else { > + WARN_ON_ONCE(abo->gart_res->size != tbo->resource->size); > + total_pages = (abo->gart_res->size) >> PAGE_SHIFT; > + } > > flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_RW); > > @@ -847,19 +860,33 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct > amdgpu_device *adev, > do_div(pages_per_xcc, num_xcc); > > for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) { > - /* MQD page: use default flags */ > - amdgpu_gart_bind(adev, > - gtt->offset + (page_idx << PAGE_SHIFT), > - 1, >t->ttm.dma_address[page_idx], flags); > - /* > - * Ctrl pages - modify the memory type to NC (ctrl_flags) from > - * the second page of the BO onward. > - */ > - amdgpu_gart_bind(adev, > - gtt->offset + ((page_idx + 1) << PAGE_SHIFT), > - pages_per_xcc - 1, > - >t->ttm.dma_address[page_idx + 1], > - ctrl_flags); > + if (ttm) { > + /* MQD page: use default flags */ > + amdgpu_gart_bind(adev, > + gtt->offset + (page_idx << PAGE_SHIFT), > + 1, >t->ttm.dma_address[page_idx], > flags); > + /* > + * Ctrl pages - modify the memory type to NC > (ctrl_flags) from > + * the second page of the BO onward. > + */ > + amdgpu_gart_bind(adev, > + gtt->offset + ((page_idx + 1) << > PAGE_SHIFT), > + pages_per_xcc - 1, > + >t->ttm.dma_address[page_idx + 1], > + ctrl_flags); > + } else { > + u64 pa = (tbo->resource->start + page_idx) << > PAGE_SHIFT; > + u64 start_page = abo->gart_res->start + page_idx; > + > + pa += adev->vm_manager.vram_base_offset; > + amdgpu_gart_map_vram_range(adev, pa, start_page, 1, > + flags, NULL); > + > + amdgpu_gart_map_vram_range(adev, pa + PAGE_SIZE, > + start_page + 1, > + pages_per_xcc - 1, > + ctrl_flags, NULL); > + } > } > } > > @@ -875,12 +902,14 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device > *adev, > flags |= AMDGPU_PTE_TMZ; > > if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { > - amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags); > + amdgpu_ttm_gart_bind_gfx9_mqd(adev, tbo, flags); > } else { > amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, > gtt->ttm.dma_address, flags); > } > - gtt->bound = true; > + > + if (ttm) > + gtt->bound = true; > } > > /* > @@ -1000,6 +1029,54 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) > return 0; > } > > +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo, > + u64 *gpu_addr) > +{ > + struct ttm_buffer_object *bo = &abo->tbo; > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); > + struct ttm_operation_ctx ctx = { false, false }; > + struct ttm_placement placement; > + struct ttm_place placements; > + struct ttm_resource *res; > + uint64_t flags; > + int r; > + > + /* Only for valid VRAM bo resource */ > + if (bo->resource->start == AMDGPU_BO_INVALID_OFFSET) > + return 0; > + > + r = amdgpu_bo_reserve(abo, false); > + if (unlikely(r)) > + return r; > + > + /* allocate GART space */ > + placement.num_placement = 1; > + placement.placement = &placements; > + placements.fpfn = 0; > + placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; > + placements.mem_type = TTM_PL_TT; > + placements.flags = bo->resource->placement; > + > + r = ttm_bo_mem_space(bo, &placement, &res, &ctx); > + if (unlikely(r)) > + goto out_unreserve; > + > + /* compute PTE flags for this buffer object */ > + flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource); > + > + /* Bind VRAM pages */ > + abo->gart_res = res; > + > + amdgpu_ttm_gart_bind(adev, bo, flags); > + amdgpu_gart_invalidate_tlb(adev); > + > + *gpu_addr = res->start << PAGE_SHIFT; > + > +out_unreserve: > + amdgpu_bo_unreserve(abo); > + return r; > +} > + > /* > * amdgpu_ttm_recover_gart - Rebind GTT pages > * > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > index 15e659575087..707654732759 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > @@ -179,6 +179,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, > u64 k_job_id); > > int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); > +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo, u64 *gpu_addr); > void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); > uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c > b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c > index c6945c842267..d96de02c6bb9 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c > @@ -148,6 +148,14 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node > *node, > kfree(mqd_mem_obj); > return NULL; > } > + > + retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->gtt_mem, > + > &(mqd_mem_obj->gpu_addr)); > + if (retval) { > + amdgpu_amdkfd_free_gtt_mem(node->adev, > &(mqd_mem_obj->gtt_mem)); > + kfree(mqd_mem_obj); > + return NULL; > + } > } else { > retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd), > &mqd_mem_obj);
