Re: [PATCH 10/10] drm/amdgpu: stop removing BOs from the LRU v3

2019-05-29 Thread Pelloux-prayer, Pierre-eric
Hi Christian,

The series is:

Tested-by: Pierre-Eric Pelloux-Prayer 


Pierre-Eric



On 29/05/2019 14:27, Christian König wrote:
> This avoids OOM situations when we have lots of threads
> submitting at the same time.
> 
> v3: apply this to the whole driver, not just CS
> 
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c| 2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c| 4 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +-
>  4 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 20f2955d2a55..3e2da24cd17a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser 
> *p,
>   }
>  
>   r = ttm_eu_reserve_buffers(>ticket, >validated, true,
> -, true);
> +, false);
>   if (unlikely(r != 0)) {
>   if (r != -ERESTARTSYS)
>   DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> index 06f83cac0d3a..f660628e6af9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> @@ -79,7 +79,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, 
> struct amdgpu_vm *vm,
>   list_add(_tv.head, );
>   amdgpu_vm_get_pd_bo(vm, , );
>  
> - r = ttm_eu_reserve_buffers(, , true, NULL, true);
> + r = ttm_eu_reserve_buffers(, , true, NULL, false);
>   if (r) {
>   DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
>   return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index d513a5ad03dd..ed25a4e14404 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -171,7 +171,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>  
>   amdgpu_vm_get_pd_bo(vm, , _pd);
>  
> - r = ttm_eu_reserve_buffers(, , false, , true);
> + r = ttm_eu_reserve_buffers(, , false, , false);
>   if (r) {
>   dev_err(adev->dev, "leaking bo va because "
>   "we fail to reserve bo (%d)\n", r);
> @@ -608,7 +608,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
> *data,
>  
>   amdgpu_vm_get_pd_bo(>vm, , _pd);
>  
> - r = ttm_eu_reserve_buffers(, , true, , true);
> + r = ttm_eu_reserve_buffers(, , true, , false);
>   if (r)
>   goto error_unref;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index c430e8259038..d60593cc436e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -155,7 +155,7 @@ static inline int amdgpu_bo_reserve(struct amdgpu_bo *bo, 
> bool no_intr)
>   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>   int r;
>  
> - r = ttm_bo_reserve(>tbo, !no_intr, false, NULL);
> + r = __ttm_bo_reserve(>tbo, !no_intr, false, NULL);
>   if (unlikely(r != 0)) {
>   if (r != -ERESTARTSYS)
>   dev_err(adev->dev, "%p reserve failed\n", bo);
> 
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 10/10] drm/amdgpu: stop removing BOs from the LRU v3

2019-05-29 Thread Zhou, David(ChunMing)
Patch #1,#5,#6,#8,#9,#10 are Reviewed-by: Chunming Zhou 
Patch #2,#3,#4 are Acked-by: Chunming Zhou 

-David

> -Original Message-
> From: dri-devel  On Behalf Of
> Christian K?nig
> Sent: Wednesday, May 29, 2019 8:27 PM
> To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
> Subject: [PATCH 10/10] drm/amdgpu: stop removing BOs from the LRU v3
> 
> This avoids OOM situations when we have lots of threads submitting at the
> same time.
> 
> v3: apply this to the whole driver, not just CS
> 
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c| 2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c| 4 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +-
>  4 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 20f2955d2a55..3e2da24cd17a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct
> amdgpu_cs_parser *p,
>   }
> 
>   r = ttm_eu_reserve_buffers(>ticket, >validated, true,
> -, true);
> +, false);
>   if (unlikely(r != 0)) {
>   if (r != -ERESTARTSYS)
>   DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> index 06f83cac0d3a..f660628e6af9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> @@ -79,7 +79,7 @@ int amdgpu_map_static_csa(struct amdgpu_device
> *adev, struct amdgpu_vm *vm,
>   list_add(_tv.head, );
>   amdgpu_vm_get_pd_bo(vm, , );
> 
> - r = ttm_eu_reserve_buffers(, , true, NULL, true);
> + r = ttm_eu_reserve_buffers(, , true, NULL, false);
>   if (r) {
>   DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
>   return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index d513a5ad03dd..ed25a4e14404 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -171,7 +171,7 @@ void amdgpu_gem_object_close(struct
> drm_gem_object *obj,
> 
>   amdgpu_vm_get_pd_bo(vm, , _pd);
> 
> - r = ttm_eu_reserve_buffers(, , false, , true);
> + r = ttm_eu_reserve_buffers(, , false, , false);
>   if (r) {
>   dev_err(adev->dev, "leaking bo va because "
>   "we fail to reserve bo (%d)\n", r);
> @@ -608,7 +608,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
> void *data,
> 
>   amdgpu_vm_get_pd_bo(>vm, , _pd);
> 
> - r = ttm_eu_reserve_buffers(, , true, , true);
> + r = ttm_eu_reserve_buffers(, , true, , false);
>   if (r)
>   goto error_unref;
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index c430e8259038..d60593cc436e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -155,7 +155,7 @@ static inline int amdgpu_bo_reserve(struct
> amdgpu_bo *bo, bool no_intr)
>   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>   int r;
> 
> - r = ttm_bo_reserve(>tbo, !no_intr, false, NULL);
> + r = __ttm_bo_reserve(>tbo, !no_intr, false, NULL);
>   if (unlikely(r != 0)) {
>   if (r != -ERESTARTSYS)
>   dev_err(adev->dev, "%p reserve failed\n", bo);
> --
> 2.17.1
> 
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 10/10] drm/amdgpu: stop removing BOs from the LRU v3

2019-05-27 Thread Koenig, Christian
Am 24.05.19 um 23:34 schrieb Kuehling, Felix:
> On 2019-05-23 5:06 a.m., Christian König wrote:
>> [CAUTION: External Email]
>>
>> Leaving BOs on the LRU is harmless. We always did this for VM page table
>> and per VM BOs.
>>
>> The key point is that BOs which couldn't be reserved can't be evicted.
>> So what happened is that an application used basically all of VRAM
>> during CS and because of this X server couldn't pin a BO for scanout.
>>
>> Now we keep the BOs on the LRU and modify TTM to block for the CS to
>> complete, which in turn allows the X server to pin its BO for scanout.
>
> OK, let me rephrase that to make sure I understand it correctly. I think
> the point is that eviction candidates come from an LRU list, so leaving
> things on the LRU makes more BOs available for eviction and avoids OOM
> situations. To take advantage of that, patch 6 adds the ability to wait
> for reserved BOs when there is nothing easier to evict.
>
> ROCm applications like to use lots of memory. So it probably makes sense
> for us to stop removing our BOs from the LRU as well while we
> mass-validate our BOs in amdgpu_amdkfd_gpuvm_restore_process_bos.

Well that would allow concurrent calls of 
amdgpu_amdkfd_gpuvm_restore_process_bos() to wait for each other.

If that's what you want then yeah that certainly makes sense.

Regards,
Christian.

>
> Regards,
>     Felix
>
>
>> Christian.
>>
>> Am 22.05.19 um 21:43 schrieb Kuehling, Felix:
>>> Can you explain how this avoids OOM situations? When is it safe to leave
>>> a reserved BO on the LRU list? Could we do the same thing in
>>> amdgpu_amdkfd_gpuvm.c? And if we did, what would be the expected side
>>> effects or consequences?
>>>
>>> Thanks,
>>>      Felix
>>>
>>> On 2019-05-22 8:59 a.m., Christian König wrote:
 [CAUTION: External Email]

 This avoids OOM situations when we have lots of threads
 submitting at the same time.

 v3: apply this to the whole driver, not just CS

 Signed-off-by: Christian König 
 ---
     drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
     drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c    | 2 +-
     drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c    | 4 ++--
     drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +-
     4 files changed, 5 insertions(+), 5 deletions(-)

 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
 index 20f2955d2a55..3e2da24cd17a 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
 @@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct
 amdgpu_cs_parser *p,
    }

    r = ttm_eu_reserve_buffers(>ticket, >validated, true,
 -  , true);
 +  , false);
    if (unlikely(r != 0)) {
    if (r != -ERESTARTSYS)
    DRM_ERROR("ttm_eu_reserve_buffers
 failed.\n");
 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
 index 06f83cac0d3a..f660628e6af9 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
 @@ -79,7 +79,7 @@ int amdgpu_map_static_csa(struct amdgpu_device
 *adev, struct amdgpu_vm *vm,
    list_add(_tv.head, );
    amdgpu_vm_get_pd_bo(vm, , );

 -   r = ttm_eu_reserve_buffers(, , true, NULL, true);
 +   r = ttm_eu_reserve_buffers(, , true, NULL, false);
    if (r) {
    DRM_ERROR("failed to reserve CSA,PD BOs:
 err=%d\n", r);
    return r;
 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
 index d513a5ad03dd..ed25a4e14404 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
 @@ -171,7 +171,7 @@ void amdgpu_gem_object_close(struct
 drm_gem_object *obj,

    amdgpu_vm_get_pd_bo(vm, , _pd);

 -   r = ttm_eu_reserve_buffers(, , false,
 , true);
 +   r = ttm_eu_reserve_buffers(, , false,
 , false);
    if (r) {
    dev_err(adev->dev, "leaking bo va because "
    "we fail to reserve bo (%d)\n", r);
 @@ -608,7 +608,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
 void *data,

    amdgpu_vm_get_pd_bo(>vm, , _pd);

 -   r = ttm_eu_reserve_buffers(, , true,
 , true);
 +   r = ttm_eu_reserve_buffers(, , true,
 , false);
    if (r)
    goto error_unref;

 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
 index c430e8259038..d60593cc436e 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
 +++ 

Re: [PATCH 10/10] drm/amdgpu: stop removing BOs from the LRU v3

2019-05-24 Thread Kuehling, Felix
On 2019-05-23 5:06 a.m., Christian König wrote:
> [CAUTION: External Email]
>
> Leaving BOs on the LRU is harmless. We always did this for VM page table
> and per VM BOs.
>
> The key point is that BOs which couldn't be reserved can't be evicted.
> So what happened is that an application used basically all of VRAM
> during CS and because of this X server couldn't pin a BO for scanout.
>
> Now we keep the BOs on the LRU and modify TTM to block for the CS to
> complete, which in turn allows the X server to pin its BO for scanout.


OK, let me rephrase that to make sure I understand it correctly. I think 
the point is that eviction candidates come from an LRU list, so leaving 
things on the LRU makes more BOs available for eviction and avoids OOM 
situations. To take advantage of that, patch 6 adds the ability to wait 
for reserved BOs when there is nothing easier to evict.

ROCm applications like to use lots of memory. So it probably makes sense 
for us to stop removing our BOs from the LRU as well while we 
mass-validate our BOs in amdgpu_amdkfd_gpuvm_restore_process_bos.

Regards,
   Felix


>
> Christian.
>
> Am 22.05.19 um 21:43 schrieb Kuehling, Felix:
>> Can you explain how this avoids OOM situations? When is it safe to leave
>> a reserved BO on the LRU list? Could we do the same thing in
>> amdgpu_amdkfd_gpuvm.c? And if we did, what would be the expected side
>> effects or consequences?
>>
>> Thanks,
>>     Felix
>>
>> On 2019-05-22 8:59 a.m., Christian König wrote:
>>> [CAUTION: External Email]
>>>
>>> This avoids OOM situations when we have lots of threads
>>> submitting at the same time.
>>>
>>> v3: apply this to the whole driver, not just CS
>>>
>>> Signed-off-by: Christian König 
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c    | 2 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c    | 4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +-
>>>    4 files changed, 5 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index 20f2955d2a55..3e2da24cd17a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct 
>>> amdgpu_cs_parser *p,
>>>   }
>>>
>>>   r = ttm_eu_reserve_buffers(>ticket, >validated, true,
>>> -  , true);
>>> +  , false);
>>>   if (unlikely(r != 0)) {
>>>   if (r != -ERESTARTSYS)
>>>   DRM_ERROR("ttm_eu_reserve_buffers 
>>> failed.\n");
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> index 06f83cac0d3a..f660628e6af9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> @@ -79,7 +79,7 @@ int amdgpu_map_static_csa(struct amdgpu_device 
>>> *adev, struct amdgpu_vm *vm,
>>>   list_add(_tv.head, );
>>>   amdgpu_vm_get_pd_bo(vm, , );
>>>
>>> -   r = ttm_eu_reserve_buffers(, , true, NULL, true);
>>> +   r = ttm_eu_reserve_buffers(, , true, NULL, false);
>>>   if (r) {
>>>   DRM_ERROR("failed to reserve CSA,PD BOs: 
>>> err=%d\n", r);
>>>   return r;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index d513a5ad03dd..ed25a4e14404 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -171,7 +171,7 @@ void amdgpu_gem_object_close(struct 
>>> drm_gem_object *obj,
>>>
>>>   amdgpu_vm_get_pd_bo(vm, , _pd);
>>>
>>> -   r = ttm_eu_reserve_buffers(, , false, 
>>> , true);
>>> +   r = ttm_eu_reserve_buffers(, , false, 
>>> , false);
>>>   if (r) {
>>>   dev_err(adev->dev, "leaking bo va because "
>>>   "we fail to reserve bo (%d)\n", r);
>>> @@ -608,7 +608,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, 
>>> void *data,
>>>
>>>   amdgpu_vm_get_pd_bo(>vm, , _pd);
>>>
>>> -   r = ttm_eu_reserve_buffers(, , true, 
>>> , true);
>>> +   r = ttm_eu_reserve_buffers(, , true, 
>>> , false);
>>>   if (r)
>>>   goto error_unref;
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> index c430e8259038..d60593cc436e 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> @@ -155,7 +155,7 @@ static inline int amdgpu_bo_reserve(struct 
>>> amdgpu_bo *bo, bool no_intr)
>>>   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>   int r;
>>>
>>> -   r = ttm_bo_reserve(>tbo, !no_intr, false, NULL);
>>> +   r = __ttm_bo_reserve(>tbo, !no_intr, false, NULL);
>>>    

Re: [PATCH 10/10] drm/amdgpu: stop removing BOs from the LRU v3

2019-05-23 Thread Christian König
Leaving BOs on the LRU is harmless. We always did this for VM page table 
and per VM BOs.


The key point is that BOs which couldn't be reserved can't be evicted. 
So what happened is that an application used basically all of VRAM 
during CS and because of this X server couldn't pin a BO for scanout.


Now we keep the BOs on the LRU and modify TTM to block for the CS to 
complete, which in turn allows the X server to pin its BO for scanout.


Christian.

Am 22.05.19 um 21:43 schrieb Kuehling, Felix:

Can you explain how this avoids OOM situations? When is it safe to leave
a reserved BO on the LRU list? Could we do the same thing in
amdgpu_amdkfd_gpuvm.c? And if we did, what would be the expected side
effects or consequences?

Thanks,
    Felix

On 2019-05-22 8:59 a.m., Christian König wrote:

[CAUTION: External Email]

This avoids OOM situations when we have lots of threads
submitting at the same time.

v3: apply this to the whole driver, not just CS

Signed-off-by: Christian König 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c| 2 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c| 4 ++--
   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +-
   4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 20f2955d2a55..3e2da24cd17a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
  }

  r = ttm_eu_reserve_buffers(>ticket, >validated, true,
-  , true);
+  , false);
  if (unlikely(r != 0)) {
  if (r != -ERESTARTSYS)
  DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 06f83cac0d3a..f660628e6af9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -79,7 +79,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
  list_add(_tv.head, );
  amdgpu_vm_get_pd_bo(vm, , );

-   r = ttm_eu_reserve_buffers(, , true, NULL, true);
+   r = ttm_eu_reserve_buffers(, , true, NULL, false);
  if (r) {
  DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
  return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d513a5ad03dd..ed25a4e14404 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -171,7 +171,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,

  amdgpu_vm_get_pd_bo(vm, , _pd);

-   r = ttm_eu_reserve_buffers(, , false, , true);
+   r = ttm_eu_reserve_buffers(, , false, , false);
  if (r) {
  dev_err(adev->dev, "leaking bo va because "
  "we fail to reserve bo (%d)\n", r);
@@ -608,7 +608,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

  amdgpu_vm_get_pd_bo(>vm, , _pd);

-   r = ttm_eu_reserve_buffers(, , true, , true);
+   r = ttm_eu_reserve_buffers(, , true, , false);
  if (r)
  goto error_unref;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index c430e8259038..d60593cc436e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -155,7 +155,7 @@ static inline int amdgpu_bo_reserve(struct amdgpu_bo *bo, 
bool no_intr)
  struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
  int r;

-   r = ttm_bo_reserve(>tbo, !no_intr, false, NULL);
+   r = __ttm_bo_reserve(>tbo, !no_intr, false, NULL);
  if (unlikely(r != 0)) {
  if (r != -ERESTARTSYS)
  dev_err(adev->dev, "%p reserve failed\n", bo);
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 10/10] drm/amdgpu: stop removing BOs from the LRU v3

2019-05-23 Thread Liang, Prike
Hi, Christian 

Thanks for you patch .Those patches can fix amdgpu bo pinned failed issue 
during perform dm_plane_helper_prepare_fb 
and Abaqus performance seems improved.

But there some error message can be observer. Do we need drop  
amdgpu_vm_validate_pt_bos() error message 
and other warning debug info .

[ 1910.674541] Call Trace:
[ 1910.676944]  [] dump_stack+0x19/0x1b
[ 1910.682236]  [] __warn+0xd8/0x100
[ 1910.687195]  [] warn_slowpath_null+0x1d/0x20
[ 1910.693167]  [] amdgpu_bo_move+0x169/0x1c0 [amdgpu]
[ 1910.699719]  [] ttm_bo_handle_move_mem+0x26b/0x5d0 [amdttm]
[ 1910.706976]  [] ttm_bo_evict+0x147/0x3b0 [amdttm]
[ 1910.713358]  [] ? drm_mm_insert_node_in_range+0x299/0x4d0 
[drm]
[ 1910.720881]  [] ? 
_kcl_reservation_object_reserve_shared+0xfe/0x1a0 [amdkcl]
[ 1910.729710]  [] ttm_mem_evict_first+0x29e/0x3a0 [amdttm]
[ 1910.736705]  [] amdttm_bo_mem_space+0x1ae/0x300 [amdttm]
[ 1910.743696]  [] amdttm_bo_validate+0xc4/0x140 [amdttm]
[ 1910.750529]  [] amdgpu_cs_bo_validate+0xa5/0x220 [amdgpu]
[ 1910.757625]  [] amdgpu_cs_validate+0x47/0x2e0 [amdgpu]
[ 1910.764463]  [] ? amdgpu_cs_bo_validate+0x220/0x220 
[amdgpu]
[ 1910.771736]  [] amdgpu_vm_validate_pt_bos+0x92/0x140 
[amdgpu]
[ 1910.779248]  [] amdgpu_cs_ioctl+0x18a7/0x1d50 [amdgpu]
[ 1910.785992]  [] ? amdgpu_cs_find_mapping+0x120/0x120 
[amdgpu]
[ 1910.793486]  [] drm_ioctl_kernel+0x6c/0xb0 [drm]
[ 1910.799777]  [] drm_ioctl+0x1e7/0x420 [drm]
[ 1910.805643]  [] ? amdgpu_cs_find_mapping+0x120/0x120 
[amdgpu]
[ 1910.813090]  [] amdgpu_drm_ioctl+0x4b/0x80 [amdgpu]
[ 1910.819639]  [] do_vfs_ioctl+0x3a0/0x5a0
[ 1910.825217]  [] ? __schedule+0x13a/0x890
[ 1910.830795]  [] SyS_ioctl+0xa1/0xc0
[ 1910.835943]  [] system_call_fastpath+0x22/0x27
[ 1910.842048] ---[ end trace a5c00b151c061d53 ]---
[ 1910.846814] [TTM] Buffer eviction failed
[ 1910.850838] [drm:amdgpu_cs_ioctl [amdgpu]] *ERROR* 
amdgpu_vm_validate_pt_bos() failed.
[ 1910.858905] [drm:amdgpu_cs_ioctl [amdgpu]] *ERROR* Failed to process the 
buffer list -22!
...

Thanks,
Prike
-Original Message-
From: Christian König  
Sent: Wednesday, May 22, 2019 9:00 PM
To: Olsak, Marek ; Zhou, David(ChunMing) 
; Liang, Prike ; 
dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
Subject: [PATCH 10/10] drm/amdgpu: stop removing BOs from the LRU v3

[CAUTION: External Email]

This avoids OOM situations when we have lots of threads submitting at the same 
time.

v3: apply this to the whole driver, not just CS

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c| 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c| 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 20f2955d2a55..3e2da24cd17a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
}

r = ttm_eu_reserve_buffers(>ticket, >validated, true,
-  , true);
+  , false);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); diff 
--git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 06f83cac0d3a..f660628e6af9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -79,7 +79,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
list_add(_tv.head, );
amdgpu_vm_get_pd_bo(vm, , );

-   r = ttm_eu_reserve_buffers(, , true, NULL, true);
+   r = ttm_eu_reserve_buffers(, , true, NULL, false);
if (r) {
DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d513a5ad03dd..ed25a4e14404 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -171,7 +171,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,

amdgpu_vm_get_pd_bo(vm, , _pd);

-   r = ttm_eu_reserve_buffers(, , false, , true);
+   r = ttm_eu_reserve_buffers(, , false, , 
+ false);
if (r) {
dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%d)\n", r); @@ -608,7 +608,7 @@ 
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

amdgpu_vm_get_pd_bo(>vm, , _pd);

-   r = ttm_eu_reserve_buffers(, , true, , true);
+   r = ttm_eu_reserve_buffers(, , true, , 
+ false);
if (r)
goto error_unref;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h

Re: [PATCH 10/10] drm/amdgpu: stop removing BOs from the LRU v3

2019-05-22 Thread Kuehling, Felix
Can you explain how this avoids OOM situations? When is it safe to leave 
a reserved BO on the LRU list? Could we do the same thing in 
amdgpu_amdkfd_gpuvm.c? And if we did, what would be the expected side 
effects or consequences?

Thanks,
   Felix

On 2019-05-22 8:59 a.m., Christian König wrote:
> [CAUTION: External Email]
>
> This avoids OOM situations when we have lots of threads
> submitting at the same time.
>
> v3: apply this to the whole driver, not just CS
>
> Signed-off-by: Christian König 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c| 2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c| 4 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +-
>   4 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 20f2955d2a55..3e2da24cd17a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser 
> *p,
>  }
>
>  r = ttm_eu_reserve_buffers(>ticket, >validated, true,
> -  , true);
> +  , false);
>  if (unlikely(r != 0)) {
>  if (r != -ERESTARTSYS)
>  DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> index 06f83cac0d3a..f660628e6af9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> @@ -79,7 +79,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, 
> struct amdgpu_vm *vm,
>  list_add(_tv.head, );
>  amdgpu_vm_get_pd_bo(vm, , );
>
> -   r = ttm_eu_reserve_buffers(, , true, NULL, true);
> +   r = ttm_eu_reserve_buffers(, , true, NULL, false);
>  if (r) {
>  DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
>  return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index d513a5ad03dd..ed25a4e14404 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -171,7 +171,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
>
>  amdgpu_vm_get_pd_bo(vm, , _pd);
>
> -   r = ttm_eu_reserve_buffers(, , false, , true);
> +   r = ttm_eu_reserve_buffers(, , false, , false);
>  if (r) {
>  dev_err(adev->dev, "leaking bo va because "
>  "we fail to reserve bo (%d)\n", r);
> @@ -608,7 +608,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void 
> *data,
>
>  amdgpu_vm_get_pd_bo(>vm, , _pd);
>
> -   r = ttm_eu_reserve_buffers(, , true, , true);
> +   r = ttm_eu_reserve_buffers(, , true, , false);
>  if (r)
>  goto error_unref;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index c430e8259038..d60593cc436e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -155,7 +155,7 @@ static inline int amdgpu_bo_reserve(struct amdgpu_bo *bo, 
> bool no_intr)
>  struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>  int r;
>
> -   r = ttm_bo_reserve(>tbo, !no_intr, false, NULL);
> +   r = __ttm_bo_reserve(>tbo, !no_intr, false, NULL);
>  if (unlikely(r != 0)) {
>  if (r != -ERESTARTSYS)
>  dev_err(adev->dev, "%p reserve failed\n", bo);
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx