Re: [PATCH v2] drm/amdgpu: Add EXT_COHERENT support for APU and NUMA systems

2023-10-19 Thread Felix Kuehling



On 2023-10-19 09:32, David Francis wrote:

On gfx943 APU, EXT_COHERENT should give MTYPE_CC for local and
MTYPE_UC for nonlocal memory.

On NUMA systems, local memory gets the local mtype, set by an
override callback. If EXT_COHERENT is set, memory will be set as
MTYPE_UC by default, with local memory MTYPE_CC.

Add an option in the override function for this case, and
add a check to ensure it is not used on UNCACHED memory.

Signed-off-by: David Francis 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 13 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  8 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 33 +++
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c  |  8 +++---
  5 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 46d27c87..189341944bf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -761,6 +761,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
   * @immediate: immediate submission in a page fault
   * @unlocked: unlocked invalidation during MM callback
   * @flush_tlb: trigger tlb invalidation after update completed
+ * @allow_override: change MTYPE for local NUMA nodes
   * @resv: fences we need to sync to
   * @start: start of mapped range
   * @last: last mapped entry
@@ -777,7 +778,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
   * 0 for success, negative erro code for failure.
   */
  int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-  bool immediate, bool unlocked, bool flush_tlb,
+  bool immediate, bool unlocked, bool flush_tlb, bool 
allow_override,
   struct dma_resv *resv, uint64_t start, uint64_t last,
   uint64_t flags, uint64_t offset, uint64_t vram_base,
   struct ttm_resource *res, dma_addr_t *pages_addr,
@@ -815,6 +816,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
params.immediate = immediate;
params.pages_addr = pages_addr;
params.unlocked = unlocked;
+   params.allow_override = allow_override;
  
  	/* Implicitly sync to command submissions in the same VM before

 * unmapping. Sync to moving fences before mapping.
@@ -1062,6 +1064,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, 
struct amdgpu_bo_va *bo_va,
trace_amdgpu_vm_bo_update(mapping);
  
  		r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,

+  !(bo->flags & 
AMDGPU_GEM_CREATE_UNCACHED),
   resv, mapping->start, mapping->last,
   update_flags, mapping->offset,
   vram_base, mem, pages_addr,
@@ -1257,8 +1260,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
mapping->start < AMDGPU_GMC_HOLE_START)
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
  
-		r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,

-  mapping->start, mapping->last,
+   r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
+  resv, mapping->start, mapping->last,
   init_pte_value, 0, 0, NULL, NULL,
   );
amdgpu_vm_free_mapping(adev, vm, mapping, f);
@@ -2546,8 +2549,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
goto error_unlock;
}
  
-	r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,

-  addr, flags, value, 0, NULL, NULL, NULL);
+   r = amdgpu_vm_update_range(adev, vm, true, false, false, false,
+  NULL, addr, addr, flags, value, 0, NULL, 
NULL, NULL);
if (r)
goto error_unlock;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

index 6e71978db13f..9ea8b5b644e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -244,6 +244,12 @@ struct amdgpu_vm_update_params {
 * @table_freed: return true if page table is freed when updating
 */
bool table_freed;
+
+   /**
+* @allow_override: true for memory that is not uncached: allows MTYPE
+* to be overridden for NUMA local memory.
+*/
+   bool allow_override;
  };
  
  struct amdgpu_vm_update_funcs {

@@ -436,7 +442,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
  void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
struct amdgpu_vm 

[PATCH v2] drm/amdgpu: Add EXT_COHERENT support for APU and NUMA systems

2023-10-19 Thread David Francis
On gfx943 APU, EXT_COHERENT should give MTYPE_CC for local and
MTYPE_UC for nonlocal memory.

On NUMA systems, local memory gets the local mtype, set by an
override callback. If EXT_COHERENT is set, memory will be set as
MTYPE_UC by default, with local memory MTYPE_CC.

Add an option in the override function for this case, and
add a check to ensure it is not used on UNCACHED memory.

Signed-off-by: David Francis 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 13 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 33 +++
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  |  8 +++---
 5 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 46d27c87..189341944bf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -761,6 +761,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
  * @immediate: immediate submission in a page fault
  * @unlocked: unlocked invalidation during MM callback
  * @flush_tlb: trigger tlb invalidation after update completed
+ * @allow_override: change MTYPE for local NUMA nodes
  * @resv: fences we need to sync to
  * @start: start of mapped range
  * @last: last mapped entry
@@ -777,7 +778,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
  * 0 for success, negative erro code for failure.
  */
 int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-  bool immediate, bool unlocked, bool flush_tlb,
+  bool immediate, bool unlocked, bool flush_tlb, bool 
allow_override,
   struct dma_resv *resv, uint64_t start, uint64_t last,
   uint64_t flags, uint64_t offset, uint64_t vram_base,
   struct ttm_resource *res, dma_addr_t *pages_addr,
@@ -815,6 +816,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
params.immediate = immediate;
params.pages_addr = pages_addr;
params.unlocked = unlocked;
+   params.allow_override = allow_override;
 
/* Implicitly sync to command submissions in the same VM before
 * unmapping. Sync to moving fences before mapping.
@@ -1062,6 +1064,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, 
struct amdgpu_bo_va *bo_va,
trace_amdgpu_vm_bo_update(mapping);
 
r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
+  !(bo->flags & 
AMDGPU_GEM_CREATE_UNCACHED),
   resv, mapping->start, mapping->last,
   update_flags, mapping->offset,
   vram_base, mem, pages_addr,
@@ -1257,8 +1260,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
mapping->start < AMDGPU_GMC_HOLE_START)
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
 
-   r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
-  mapping->start, mapping->last,
+   r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
+  resv, mapping->start, mapping->last,
   init_pte_value, 0, 0, NULL, NULL,
   );
amdgpu_vm_free_mapping(adev, vm, mapping, f);
@@ -2546,8 +2549,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
goto error_unlock;
}
 
-   r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
-  addr, flags, value, 0, NULL, NULL, NULL);
+   r = amdgpu_vm_update_range(adev, vm, true, false, false, false,
+  NULL, addr, addr, flags, value, 0, NULL, 
NULL, NULL);
if (r)
goto error_unlock;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 6e71978db13f..9ea8b5b644e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -244,6 +244,12 @@ struct amdgpu_vm_update_params {
 * @table_freed: return true if page table is freed when updating
 */
bool table_freed;
+
+   /**
+* @allow_override: true for memory that is not uncached: allows MTYPE
+* to be overridden for NUMA local memory.
+*/
+   bool allow_override;
 };
 
 struct amdgpu_vm_update_funcs {
@@ -436,7 +442,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
struct amdgpu_vm *vm, struct amdgpu_bo *bo);
 int