On 2026. május 29., péntek 13:24:05 közép-európai nyári idő Christian König 
wrote:
> Make sure that we use the emmergency reserves for unrecoverable page
> faults and GPU resets.
> 
> Signed-off-by: Christian König <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 23 ++++++++++++++++++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 18 ++++++++++++------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sa.h |  3 ++-
>  3 files changed, 34 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 334f95f8f339..60e4c3985029
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -356,16 +356,33 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
>               [AMDGPU_IB_POOL_IMMEDIATE] = SZ_128K,
>               [AMDGPU_IB_POOL_DIRECT] = SZ_512K
>       };
> +     const gfp_t gfp_flags[AMDGPU_IB_POOL_MAX] = {
> +             /*
> +              * For normal page table updates and recoverable page 
faults,
> +              * further restricted by the VM eviction lock to not 
wait for
> +              * memory reclaim.
> +              */
> +             [AMDGPU_IB_POOL_DELAYED] = GFP_KERNEL,
> +             /*
> +              * For redirecting unrecoverable page faults to the 
dummy page
> +              * or set the PRT bits. dma_fence submissions might 
depend on
> +              * that so we need the emmergency resewrves.
> +              */
> +             [AMDGPU_IB_POOL_IMMEDIATE] = GFP_ATOMIC,

I know that "retry faults" and "recoverable faults" are different, but both of 
these faults can be mitigated, so using the term "unrecoverable" here feels 
somewhat confusing to me.

Can you please clarify the comment above as well as the commit message that 
this is referring to retry faults?

> +             /*
> +              * For IB tests during GPU resets. Only very small and 
temporary
> +              * allocation to make allow dma_fences to signal.
> +              */
> +             [AMDGPU_IB_POOL_DIRECT] = GFP_ATOMIC

Just a small grammatical nitpick but "make" is not necessary in this comment.

> +     };
>       int r, i;
> 
>       if (adev->ib_pool_ready)
>               return 0;
> 
> -

Looks like this is just removing the spurious newline from the previous 
commit.

With that fixed and the comments + commit message clarified:
Reviewed-by: Timur Kristóf <[email protected]>

>       for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
>               r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i],
> -                                           sizes[i], 256,
> -                                           
AMDGPU_GEM_DOMAIN_GTT);
> +                                           sizes[i], 
gfp_flags[i]);
>               if (r)
>                       goto error;
>       }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 39070b2a4c04..74124f80601e
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> @@ -46,11 +46,13 @@
> 
>  int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
>                             struct amdgpu_sa_manager *sa_manager,
> -                           unsigned int size, u32 suballoc_align, 
u32 domain)
> +                           unsigned int size, gfp_t gfp_flags)
>  {
>       int r;
> 
> -     r = amdgpu_bo_create_kernel(adev, size, AMDGPU_GPU_PAGE_SIZE, 
domain,
> +     sa_manager->gfp_flags = gfp_flags;
> +     r = amdgpu_bo_create_kernel(adev, size, AMDGPU_GPU_PAGE_SIZE,
> +                                 AMDGPU_GEM_DOMAIN_GTT,
>                                   &sa_manager->bo, &sa_manager-
>gpu_addr,
>                                   &sa_manager->cpu_ptr);
>       if (r) {
> @@ -59,7 +61,8 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
>       }
> 
>       memset(sa_manager->cpu_ptr, 0, size);
> -     drm_suballoc_manager_init(&sa_manager->base, size, 
suballoc_align);
> +     drm_suballoc_manager_init(&sa_manager->base, size, 256);
> +
>       return r;
>  }
> 
> @@ -73,7 +76,8 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
> 
>       drm_suballoc_manager_fini(&sa_manager->base);
> 
> -     amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr,
> &sa_manager->cpu_ptr); +      amdgpu_bo_free_kernel(&sa_manager->bo,
> &sa_manager->gpu_addr,
> +                           &sa_manager->cpu_ptr);
>  }
> 
>  int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
> @@ -81,7 +85,8 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
> unsigned int size)
>  {
>       struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, 
size,
> -                                                
GFP_KERNEL, false, 0);
> +                                                
sa_manager->gfp_flags,
> +                                                false, 
0);
> 
>       if (IS_ERR(sa)) {
>               *sa_bo = NULL;
> @@ -110,6 +115,7 @@ void amdgpu_sa_bo_dump_debug_info(struct
> amdgpu_sa_manager *sa_manager, {
>       struct drm_printer p = drm_seq_file_printer(m);
> 
> -     drm_suballoc_dump_debug_info(&sa_manager->base, &p, sa_manager-
>gpu_addr);
> +     drm_suballoc_dump_debug_info(&sa_manager->base, &p,
> +                                  sa_manager->gpu_addr);
>  }
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.h index 8c85c80fc119..1d1c89348709
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.h
> @@ -35,6 +35,7 @@ struct amdgpu_sa_manager {
>       struct amdgpu_bo                *bo;
>       uint64_t                        gpu_addr;
>       void                            *cpu_ptr;
> +     gfp_t                           gfp_flags;
>  };
> 
>  static inline struct amdgpu_sa_manager *
> @@ -57,7 +58,7 @@ static inline void *amdgpu_sa_bo_cpu_addr(struct
> drm_suballoc *sa_bo)
> 
>  int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
>                             struct amdgpu_sa_manager *sa_manager,
> -                           unsigned size, u32 align, u32 domain);
> +                           unsigned size, gfp_t gfp_flags);
>  void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
>                              struct amdgpu_sa_manager 
*sa_manager);
>  int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,




Reply via email to