Re: [PATCH v2] drm/amdgpu: Add EXT_COHERENT support for APU and NUMA systems
On 2023-10-19 09:32, David Francis wrote: On gfx943 APU, EXT_COHERENT should give MTYPE_CC for local and MTYPE_UC for nonlocal memory. On NUMA systems, local memory gets the local mtype, set by an override callback. If EXT_COHERENT is set, memory will be set as MTYPE_UC by default, with local memory MTYPE_CC. Add an option in the override function for this case, and add a check to ensure it is not used on UNCACHED memory. Signed-off-by: David Francis --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 13 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 33 +++ drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 +++--- 5 files changed, 42 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 46d27c87..189341944bf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -761,6 +761,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, * @immediate: immediate submission in a page fault * @unlocked: unlocked invalidation during MM callback * @flush_tlb: trigger tlb invalidation after update completed + * @allow_override: change MTYPE for local NUMA nodes * @resv: fences we need to sync to * @start: start of mapped range * @last: last mapped entry @@ -777,7 +778,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, * 0 for success, negative erro code for failure. */ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, - bool immediate, bool unlocked, bool flush_tlb, + bool immediate, bool unlocked, bool flush_tlb, bool allow_override, struct dma_resv *resv, uint64_t start, uint64_t last, uint64_t flags, uint64_t offset, uint64_t vram_base, struct ttm_resource *res, dma_addr_t *pages_addr, @@ -815,6 +816,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.immediate = immediate; params.pages_addr = pages_addr; params.unlocked = unlocked; + params.allow_override = allow_override; /* Implicitly sync to command submissions in the same VM before * unmapping. Sync to moving fences before mapping. @@ -1062,6 +1064,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, trace_amdgpu_vm_bo_update(mapping); r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, + !(bo->flags & AMDGPU_GEM_CREATE_UNCACHED), resv, mapping->start, mapping->last, update_flags, mapping->offset, vram_base, mem, pages_addr, @@ -1257,8 +1260,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_update_range(adev, vm, false, false, true, resv, - mapping->start, mapping->last, + r = amdgpu_vm_update_range(adev, vm, false, false, true, false, + resv, mapping->start, mapping->last, init_pte_value, 0, 0, NULL, NULL, ); amdgpu_vm_free_mapping(adev, vm, mapping, f); @@ -2546,8 +2549,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, goto error_unlock; } - r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr, - addr, flags, value, 0, NULL, NULL, NULL); + r = amdgpu_vm_update_range(adev, vm, true, false, false, false, + NULL, addr, addr, flags, value, 0, NULL, NULL, NULL); if (r) goto error_unlock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 6e71978db13f..9ea8b5b644e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -244,6 +244,12 @@ struct amdgpu_vm_update_params { * @table_freed: return true if page table is freed when updating */ bool table_freed; + + /** +* @allow_override: true for memory that is not uncached: allows MTYPE +* to be overridden for NUMA local memory. +*/ + bool allow_override; }; struct amdgpu_vm_update_funcs { @@ -436,7 +442,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, struct amdgpu_vm
[PATCH v2] drm/amdgpu: Add EXT_COHERENT support for APU and NUMA systems
On gfx943 APU, EXT_COHERENT should give MTYPE_CC for local and MTYPE_UC for nonlocal memory. On NUMA systems, local memory gets the local mtype, set by an override callback. If EXT_COHERENT is set, memory will be set as MTYPE_UC by default, with local memory MTYPE_CC. Add an option in the override function for this case, and add a check to ensure it is not used on UNCACHED memory. Signed-off-by: David Francis --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 13 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 33 +++ drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 +++--- 5 files changed, 42 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 46d27c87..189341944bf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -761,6 +761,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, * @immediate: immediate submission in a page fault * @unlocked: unlocked invalidation during MM callback * @flush_tlb: trigger tlb invalidation after update completed + * @allow_override: change MTYPE for local NUMA nodes * @resv: fences we need to sync to * @start: start of mapped range * @last: last mapped entry @@ -777,7 +778,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, * 0 for success, negative erro code for failure. */ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, - bool immediate, bool unlocked, bool flush_tlb, + bool immediate, bool unlocked, bool flush_tlb, bool allow_override, struct dma_resv *resv, uint64_t start, uint64_t last, uint64_t flags, uint64_t offset, uint64_t vram_base, struct ttm_resource *res, dma_addr_t *pages_addr, @@ -815,6 +816,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.immediate = immediate; params.pages_addr = pages_addr; params.unlocked = unlocked; + params.allow_override = allow_override; /* Implicitly sync to command submissions in the same VM before * unmapping. Sync to moving fences before mapping. @@ -1062,6 +1064,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, trace_amdgpu_vm_bo_update(mapping); r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, + !(bo->flags & AMDGPU_GEM_CREATE_UNCACHED), resv, mapping->start, mapping->last, update_flags, mapping->offset, vram_base, mem, pages_addr, @@ -1257,8 +1260,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_update_range(adev, vm, false, false, true, resv, - mapping->start, mapping->last, + r = amdgpu_vm_update_range(adev, vm, false, false, true, false, + resv, mapping->start, mapping->last, init_pte_value, 0, 0, NULL, NULL, ); amdgpu_vm_free_mapping(adev, vm, mapping, f); @@ -2546,8 +2549,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, goto error_unlock; } - r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr, - addr, flags, value, 0, NULL, NULL, NULL); + r = amdgpu_vm_update_range(adev, vm, true, false, false, false, + NULL, addr, addr, flags, value, 0, NULL, NULL, NULL); if (r) goto error_unlock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 6e71978db13f..9ea8b5b644e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -244,6 +244,12 @@ struct amdgpu_vm_update_params { * @table_freed: return true if page table is freed when updating */ bool table_freed; + + /** +* @allow_override: true for memory that is not uncached: allows MTYPE +* to be overridden for NUMA local memory. +*/ + bool allow_override; }; struct amdgpu_vm_update_funcs { @@ -436,7 +442,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, struct amdgpu_vm *vm, struct amdgpu_bo *bo); int