Re: [PATCH v3] drm/amdgpu: Add EXT_COHERENCE memory allocation flags

2023-07-27 Thread Felix Kuehling
In amdgpu_dma_buf_create_obj we copy the coherence-related flags to the 
SG BO that's used to attach the BO to the importer device. You need to 
add the new flag to the list.


Some more nit-picks inline.

Am 2023-07-26 um 09:34 schrieb David Francis:

These flags (for GEM and SVM allocations) allocate
memory that allows for system-scope atomic semantics.

On GFX943 these flags cause caches to be avoided on
non-local memory.

On all other ASICs they are identical in functionality to the
equivalent COHERENT flags.

Corresponding Thunk patch is at
https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/pull/88

Signed-off-by: David Francis 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  2 ++
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   |  1 +
  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c   |  1 +
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c|  5 -
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 10 +-
  include/uapi/drm/amdgpu_drm.h|  7 +++
  include/uapi/linux/kfd_ioctl.h   |  3 +++
  7 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d34c3ef8f3ed..7f23bc0ee592 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1738,6 +1738,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
  
  	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)

alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENCE)
+   alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENCE;
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c

index 6b430e10d38e..8e951688668b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -632,6 +632,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
}
  
  	if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |

+  AMDGPU_GEM_CREATE_EXT_COHERENCE |
   AMDGPU_GEM_CREATE_UNCACHED))
*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
 AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index a6ee0220db56..ff330c7c0232 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -540,6 +540,7 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
}
  
  	if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |

+  AMDGPU_GEM_CREATE_EXT_COHERENCE |
   AMDGPU_GEM_CREATE_UNCACHED))
*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
 AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 880460cd3239..e40fcfc1a3f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1183,7 +1183,8 @@ static void gmc_v9_0_get_coherence_flags(struct 
amdgpu_device *adev,
  {
struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
-   bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
+   bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | 
AMDGPU_GEM_CREATE_EXT_COHERENCE);
+   bool ext_coherence = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENCE;
bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
struct amdgpu_vm *vm = mapping->bo_va->base.vm;
unsigned int mtype_local, mtype;
@@ -1251,6 +1252,8 @@ static void gmc_v9_0_get_coherence_flags(struct 
amdgpu_device *adev,
snoop = true;
if (uncached) {
mtype = MTYPE_UC;
+   } else if (ext_coherence) {
+   mtype = is_local ? MTYPE_CC : MTYPE_UC;
} else if (adev->flags & AMD_IS_APU) {
mtype = is_local ? mtype_local : MTYPE_NC;
} else {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 1b50eae051a4..28304b93a990 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1155,7 +1155,8 @@ svm_range_get_pte_flags(struct kfd_node *node,
uint32_t mapping_flags = 0;
uint64_t pte_flags;
bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
-   bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
+   bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | 
KFD_IOCTL_SVM_FLAG_EXT_COHERENCE);
+   bool ext_coherence = flags & 

[PATCH v3] drm/amdgpu: Add EXT_COHERENCE memory allocation flags

2023-07-26 Thread David Francis
These flags (for GEM and SVM allocations) allocate
memory that allows for system-scope atomic semantics.

On GFX943 these flags cause caches to be avoided on
non-local memory.

On all other ASICs they are identical in functionality to the
equivalent COHERENT flags.

Corresponding Thunk patch is at
https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/pull/88

Signed-off-by: David Francis 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c   |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c|  5 -
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 10 +-
 include/uapi/drm/amdgpu_drm.h|  7 +++
 include/uapi/linux/kfd_ioctl.h   |  3 +++
 7 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d34c3ef8f3ed..7f23bc0ee592 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1738,6 +1738,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENCE)
+   alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENCE;
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 6b430e10d38e..8e951688668b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -632,6 +632,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
}
 
if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+  AMDGPU_GEM_CREATE_EXT_COHERENCE |
   AMDGPU_GEM_CREATE_UNCACHED))
*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
 AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index a6ee0220db56..ff330c7c0232 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -540,6 +540,7 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
}
 
if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+  AMDGPU_GEM_CREATE_EXT_COHERENCE |
   AMDGPU_GEM_CREATE_UNCACHED))
*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
 AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 880460cd3239..e40fcfc1a3f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1183,7 +1183,8 @@ static void gmc_v9_0_get_coherence_flags(struct 
amdgpu_device *adev,
 {
struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
-   bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
+   bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | 
AMDGPU_GEM_CREATE_EXT_COHERENCE);
+   bool ext_coherence = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENCE;
bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
struct amdgpu_vm *vm = mapping->bo_va->base.vm;
unsigned int mtype_local, mtype;
@@ -1251,6 +1252,8 @@ static void gmc_v9_0_get_coherence_flags(struct 
amdgpu_device *adev,
snoop = true;
if (uncached) {
mtype = MTYPE_UC;
+   } else if (ext_coherence) {
+   mtype = is_local ? MTYPE_CC : MTYPE_UC;
} else if (adev->flags & AMD_IS_APU) {
mtype = is_local ? mtype_local : MTYPE_NC;
} else {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 1b50eae051a4..28304b93a990 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1155,7 +1155,8 @@ svm_range_get_pte_flags(struct kfd_node *node,
uint32_t mapping_flags = 0;
uint64_t pte_flags;
bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
-   bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
+   bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | 
KFD_IOCTL_SVM_FLAG_EXT_COHERENCE);
+   bool ext_coherence = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENCE;
bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/
unsigned int mtype_local;
 
@@ -1203,6 +1204,13 @@ svm_range_get_pte_flags(struct kfd_node *node,
snoop = true;
if (uncached) {