On 2025-12-15 10:20, Christian König wrote:
On 12/10/25 00:43, Philip Yang wrote:MQD BO on VRAM access via FB aperture is mtype UC uncaching, map to GART as mtype RW caching, to reduce queue switch latency Add GART mm_node to kfd mem obj to free the GART entries after MQD mem obj is freed. Use resource cursor to handle VRAM resource which maybe on multiple blocks and use cursor_gart to handle GART entries. Signed-off-by: Philip Yang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 89 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 2 + .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 9 ++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + 5 files changed, 104 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 4f8bc7f35cdc..ae4f60aeed14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -880,6 +880,62 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, } }+static void amdgpu_ttm_gart_bind_gfx9_mqd_vram(struct amdgpu_device *adev,+ struct ttm_buffer_object *tbo, + struct drm_mm_node *mm_node, + uint64_t flags) +{ + uint64_t total_pages; + int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); + uint64_t page_idx, pages_per_xcc; + struct amdgpu_res_cursor cursor_gart; + struct amdgpu_res_cursor cursor; + uint64_t ctrl_flags = flags; + int i; + + total_pages = tbo->resource->size >> PAGE_SHIFT;Please use tbo->base.size instead.
done
tbo resource and cursor start is bytes, GART entries and cursor start is page, but it is too much changes for drm mm_nodeAnd it would be nicer if the calculation was in bytes and not pages, but not a must have.
to use bytes start.
How about this check, if MQD on GTT for other ASICs, that is already mapped correctly.+ + amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_NC, &ctrl_flags); + + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 3)) + amdgpu_gmc_get_vm_pte(adev, NULL, NULL, AMDGPU_VM_MTYPE_RW, &flags); + + pages_per_xcc = total_pages; + do_div(pages_per_xcc, num_xcc); + + amdgpu_res_first(NULL, mm_node->start, total_pages, &cursor_gart); + amdgpu_res_first(tbo->resource, 0, tbo->resource->size, &cursor); + + for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) { + u64 start_page; + u64 npages, n; + u64 pa; + + start_page = cursor_gart.start; + pa = cursor.start + adev->vm_manager.vram_base_offset; + n = 1; + amdgpu_gart_map_vram_range(adev, pa, start_page, n, + flags, NULL); + + npages = pages_per_xcc - 1; + while (npages) { + amdgpu_res_next(&cursor_gart, n); + amdgpu_res_next(&cursor, n * PAGE_SIZE); + + start_page = cursor_gart.start; + pa = cursor.start + adev->vm_manager.vram_base_offset; + n = min3(cursor.size / PAGE_SIZE, cursor_gart.size, npages); + + amdgpu_gart_map_vram_range(adev, pa, start_page, n, + ctrl_flags, NULL); + + npages -= n; + } + amdgpu_res_next(&cursor_gart, n); + amdgpu_res_next(&cursor, n * PAGE_SIZE); + } +} + static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, struct ttm_buffer_object *tbo, uint64_t flags) @@ -1017,6 +1073,39 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) return 0; }+/*+ * amdgpu_ttm_alloc_gart_vram_bo - Bind VRAM pages to GART mapping + * + * call amdgpu_ttm_alloc_gart_entries to alloc GART dynamically + */ +int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo, + struct drm_mm_node *mm_node, + u64 *gpu_addr) +{ + struct ttm_buffer_object *bo = &abo->tbo; + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + uint64_t flags; + int r; + + /* Only for valid VRAM bo resource */ + if (bo->resource->start == AMDGPU_BO_INVALID_OFFSET) + return 0;Please drop that check. We really shouldn't touch bo->resource->start any more.
if (amdgpu_mem_type_to_domain(bo.resource->mem_type) != AMDGPU_GEM_DOMAIN_VRAM) return 0;
The color parameter is removed, the GPU recovery path, gtt recover is not affected.Apart from that looks reasonable to me, but I'm wondering if GART re-creation after GPU recovery will still work or not.
Regards, Philip
@Pierre-Eric could you double check that? Regards, Christian.+ + r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, mm_node, + amdgpu_bo_ngpu_pages(abo), + 0, 0, 0); + if (r) + return r; + + /* compute PTE flags for this buffer object */ + flags = amdgpu_ttm_tt_pte_flags(adev, NULL, bo->resource); + amdgpu_ttm_gart_bind_gfx9_mqd_vram(adev, bo, mm_node, flags); + amdgpu_gart_invalidate_tlb(adev); + + *gpu_addr = mm_node->start << PAGE_SHIFT; + return 0; +} + /* * amdgpu_ttm_recover_gart - Rebind GTT pages * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 28511e66d364..a8b8a541e21b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -140,7 +140,6 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr); int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr, struct drm_mm_node *node, u64 num_pages, u64 alignment, @@ -192,6 +191,9 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, u64 k_job_id);int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);+int amdgpu_ttm_alloc_gart_vram_bo(struct amdgpu_bo *abo, + struct drm_mm_node *mm_node, + u64 *gpu_addr); void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.cindex f78b249e1a41..edb72f4ef82d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -225,6 +225,8 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { if (mqd_mem_obj->mem) { + amdgpu_gtt_mgr_free_entries(&mm->dev->adev->mman.gtt_mgr, + &mqd_mem_obj->mm_node); amdgpu_amdkfd_free_kernel_mem(mm->dev->adev, &mqd_mem_obj->mem); kfree(mqd_mem_obj); } else { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 14123e1a9716..5828220056bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -148,6 +148,15 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node, kfree(mqd_mem_obj); return NULL; } + + retval = amdgpu_ttm_alloc_gart_vram_bo(mqd_mem_obj->mem, + &mqd_mem_obj->mm_node, + &(mqd_mem_obj->gpu_addr)); + if (retval) { + amdgpu_amdkfd_free_kernel_mem(node->adev, &(mqd_mem_obj->mem)); + kfree(mqd_mem_obj); + return NULL; + } } else { retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd), &mqd_mem_obj); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 29419b3249cf..fdde907836fb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -252,6 +252,7 @@ struct kfd_mem_obj { uint64_t gpu_addr; uint32_t *cpu_ptr; void *mem; + struct drm_mm_node mm_node; };struct kfd_vmid_info {
