Re: [PATCH 4/4] drm/amdgpu: Use optimal mtypes and PTE bits for Arcturus

2019-09-02 Thread Koenig, Christian
Yeah, get_pte_flags was really added in the wrong place. We should have 
rather just mapped the MTYPE ASIC generation defendant.

I'm going to clean that up,
Christian.

Am 30.08.19 um 22:59 schrieb Liu, Shaoyun:
> Serials are reviewed by :  shaoyunl 
>
> Looks like a little bit confusing that we have  two place for the pte
> flags .  get_pte_flags  already get asic specific mapping flags  and
> inside amdgpu_vm_bo_split_mapping , driver adjust the real HW mapping
> flags again .  Maybe  better just keep the logic in one place in the
> future .
>
>
> Regards
>
> shaoyun.liu
>
>
>
> On 2019-08-26 7:07 p.m., Kuehling, Felix wrote:
>> For compute VRAM allocations on Arturus use the new RW mtype
>> for non-coherent local memory, CC mtype for coherent local
>> memory and PTE_SNOOPED bit for invalidating non-dirty cache
>> lines on remote XGMI mappings.
>>
>> Signed-off-by: Felix Kuehling 
>> ---
>>.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 20 +--
>>drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  4 
>>2 files changed, 22 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index 1b7340a18f67..c5c18e292ae3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -357,6 +357,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct 
>> amdgpu_sync *sync)
>>
>>static uint32_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem 
>> *mem)
>>{
>> +struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
>>  bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT;
>>  uint32_t mapping_flags;
>>
>> @@ -366,8 +367,23 @@ static uint32_t get_pte_flags(struct amdgpu_device 
>> *adev, struct kgd_mem *mem)
>>  if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE)
>>  mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
>>
>> -mapping_flags |= coherent ?
>> -AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
>> +switch (adev->asic_type) {
>> +case CHIP_ARCTURUS:
>> +if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) {
>> +if (bo_adev == adev)
>> +mapping_flags |= coherent ?
>> +AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
>> +else
>> +mapping_flags |= AMDGPU_VM_MTYPE_UC;
>> +} else {
>> +mapping_flags |= coherent ?
>> +AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
>> +}
>> +break;
>> +default:
>> +mapping_flags |= coherent ?
>> +AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
>> +}
>>
>>  return amdgpu_gmc_get_pte_flags(adev, mapping_flags);
>>}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 7ddca3eeb6cf..189ad5699946 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1592,6 +1592,10 @@ static int amdgpu_vm_bo_split_mapping(struct 
>> amdgpu_device *adev,
>>  }
>>  flags &= ~AMDGPU_PTE_VALID;
>>  }
>> +if (adev->asic_type == CHIP_ARCTURUS &&
>> +!(flags & AMDGPU_PTE_SYSTEM) &&
>> +mapping->bo_va->is_xgmi)
>> +flags |= AMDGPU_PTE_SNOOPED;
>>
>>  trace_amdgpu_vm_bo_update(mapping);
>>

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 4/4] drm/amdgpu: Use optimal mtypes and PTE bits for Arcturus

2019-08-30 Thread Liu, Shaoyun
Serials are reviewed by :  shaoyunl 

Looks like a little bit confusing that we have  two place for the pte 
flags .  get_pte_flags  already get asic specific mapping flags  and  
inside amdgpu_vm_bo_split_mapping , driver adjust the real HW mapping 
flags again .  Maybe  better just keep the logic in one place in the 
future .


Regards

shaoyun.liu



On 2019-08-26 7:07 p.m., Kuehling, Felix wrote:
> For compute VRAM allocations on Arturus use the new RW mtype
> for non-coherent local memory, CC mtype for coherent local
> memory and PTE_SNOOPED bit for invalidating non-dirty cache
> lines on remote XGMI mappings.
>
> Signed-off-by: Felix Kuehling 
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 20 +--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  4 
>   2 files changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 1b7340a18f67..c5c18e292ae3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -357,6 +357,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct 
> amdgpu_sync *sync)
>   
>   static uint32_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem 
> *mem)
>   {
> + struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
>   bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT;
>   uint32_t mapping_flags;
>   
> @@ -366,8 +367,23 @@ static uint32_t get_pte_flags(struct amdgpu_device 
> *adev, struct kgd_mem *mem)
>   if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE)
>   mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
>   
> - mapping_flags |= coherent ?
> - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
> + switch (adev->asic_type) {
> + case CHIP_ARCTURUS:
> + if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) {
> + if (bo_adev == adev)
> + mapping_flags |= coherent ?
> + AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
> + else
> + mapping_flags |= AMDGPU_VM_MTYPE_UC;
> + } else {
> + mapping_flags |= coherent ?
> + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
> + }
> + break;
> + default:
> + mapping_flags |= coherent ?
> + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
> + }
>   
>   return amdgpu_gmc_get_pte_flags(adev, mapping_flags);
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 7ddca3eeb6cf..189ad5699946 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1592,6 +1592,10 @@ static int amdgpu_vm_bo_split_mapping(struct 
> amdgpu_device *adev,
>   }
>   flags &= ~AMDGPU_PTE_VALID;
>   }
> + if (adev->asic_type == CHIP_ARCTURUS &&
> + !(flags & AMDGPU_PTE_SYSTEM) &&
> + mapping->bo_va->is_xgmi)
> + flags |= AMDGPU_PTE_SNOOPED;
>   
>   trace_amdgpu_vm_bo_update(mapping);
>   
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 4/4] drm/amdgpu: Use optimal mtypes and PTE bits for Arcturus

2019-08-27 Thread Koenig, Christian
Am 27.08.19 um 01:07 schrieb Kuehling, Felix:
> For compute VRAM allocations on Arturus use the new RW mtype
> for non-coherent local memory, CC mtype for coherent local
> memory and PTE_SNOOPED bit for invalidating non-dirty cache
> lines on remote XGMI mappings.
>
> Signed-off-by: Felix Kuehling 

I would give an rb on the part in amdgpu_vm_bo_split_mapping(), but 
can't fully judge the KFD part for correctness.

So only Acked-by: Christian König 

> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 20 +--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  4 
>   2 files changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 1b7340a18f67..c5c18e292ae3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -357,6 +357,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct 
> amdgpu_sync *sync)
>   
>   static uint32_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem 
> *mem)
>   {
> + struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
>   bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT;
>   uint32_t mapping_flags;
>   
> @@ -366,8 +367,23 @@ static uint32_t get_pte_flags(struct amdgpu_device 
> *adev, struct kgd_mem *mem)
>   if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE)
>   mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
>   
> - mapping_flags |= coherent ?
> - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
> + switch (adev->asic_type) {
> + case CHIP_ARCTURUS:
> + if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) {
> + if (bo_adev == adev)
> + mapping_flags |= coherent ?
> + AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
> + else
> + mapping_flags |= AMDGPU_VM_MTYPE_UC;
> + } else {
> + mapping_flags |= coherent ?
> + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
> + }
> + break;
> + default:
> + mapping_flags |= coherent ?
> + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
> + }
>   
>   return amdgpu_gmc_get_pte_flags(adev, mapping_flags);
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 7ddca3eeb6cf..189ad5699946 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1592,6 +1592,10 @@ static int amdgpu_vm_bo_split_mapping(struct 
> amdgpu_device *adev,
>   }
>   flags &= ~AMDGPU_PTE_VALID;
>   }
> + if (adev->asic_type == CHIP_ARCTURUS &&
> + !(flags & AMDGPU_PTE_SYSTEM) &&
> + mapping->bo_va->is_xgmi)
> + flags |= AMDGPU_PTE_SNOOPED;
>   
>   trace_amdgpu_vm_bo_update(mapping);
>   

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/4] drm/amdgpu: Use optimal mtypes and PTE bits for Arcturus

2019-08-26 Thread Kuehling, Felix
For compute VRAM allocations on Arturus use the new RW mtype
for non-coherent local memory, CC mtype for coherent local
memory and PTE_SNOOPED bit for invalidating non-dirty cache
lines on remote XGMI mappings.

Signed-off-by: Felix Kuehling 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 20 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  4 
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 1b7340a18f67..c5c18e292ae3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -357,6 +357,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct 
amdgpu_sync *sync)
 
 static uint32_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
 {
+   struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT;
uint32_t mapping_flags;
 
@@ -366,8 +367,23 @@ static uint32_t get_pte_flags(struct amdgpu_device *adev, 
struct kgd_mem *mem)
if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE)
mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
 
-   mapping_flags |= coherent ?
-   AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+   switch (adev->asic_type) {
+   case CHIP_ARCTURUS:
+   if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) {
+   if (bo_adev == adev)
+   mapping_flags |= coherent ?
+   AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
+   else
+   mapping_flags |= AMDGPU_VM_MTYPE_UC;
+   } else {
+   mapping_flags |= coherent ?
+   AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+   }
+   break;
+   default:
+   mapping_flags |= coherent ?
+   AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+   }
 
return amdgpu_gmc_get_pte_flags(adev, mapping_flags);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 7ddca3eeb6cf..189ad5699946 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1592,6 +1592,10 @@ static int amdgpu_vm_bo_split_mapping(struct 
amdgpu_device *adev,
}
flags &= ~AMDGPU_PTE_VALID;
}
+   if (adev->asic_type == CHIP_ARCTURUS &&
+   !(flags & AMDGPU_PTE_SYSTEM) &&
+   mapping->bo_va->is_xgmi)
+   flags |= AMDGPU_PTE_SNOOPED;
 
trace_amdgpu_vm_bo_update(mapping);
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx