On 2026. május 29., péntek 13:24:07 közép-európai nyári idő Christian König 
wrote:
> For job allocation in GPU reset and page fault handling we must use
> GFP_ATOMIC to guarantee that we don't cycle back and depend on a
> dma_fence submission for the memory allocation.

Can you please edit the commit message to elaborate  a little more?
For example, please add something like this:

Add gfp_flags argument to amdgpu_job_alloc() and expose the
gfp_flags of IB pools with amdgpu_ib_pool_gfp_flags() so that
we can use different flags when allocating jobs.

(Feel free to edit the text to improve it to your tastes.)

> 
> Co-developed by Claude Sonnet 4.

This tag seems inconsistent with the kernel guidelines and also inconsistent 
with commits from other AMD engineers. Can you please change this to use the
Assisted-by tag? Also in the other commits in this series.

Link to the guidelines:
https://docs.kernel.org/process/coding-assistants.html

Otherwise, the code looks fine.

Reviewed-by: Timur Kristóf <[email protected]>

> 
> Signed-off-by: Christian König <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  3 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c     | 11 +++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    |  9 +++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h    |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 ++
>  6 files changed, 22 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index
> 44751d71b741..c425f960361c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -696,7 +696,8 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
>               goto err;
>       }
> 
> -     ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, 0, &job);
> +     ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, 0, GFP_KERNEL,
> +                            &job);
>       if (ret)
>               goto err;
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index fdf01d824d66..3ed2cb9dfa38
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -275,7 +275,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
>       for (i = 0; i < p->gang_size; ++i) {
>               ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm,
>                                      num_ibs[i], p->filp-
>client_id,
> -                                    &p->jobs[i]);
> +                                    GFP_KERNEL, &p->jobs[i]);
>               if (ret)
>                       goto free_all_kdata;
>               switch (p->adev->enforce_isolation[fpriv->xcp_id]) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 60e4c3985029..83a802cf5837
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -416,6 +416,17 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
>       adev->ib_pool_ready = false;
>  }
> 
> +/**
> + * amdgpu_ib_pool_gfp_flags - Returns the gfp flags to use for each pool
> + * @adev: amdgpu device pointer
> + * @type: the IB pool type
> + */
> +gfp_t amdgpu_ib_pool_gfp_flags(struct amdgpu_device *adev,
> +                            enum amdgpu_ib_pool_type type)
> +{
> +     return adev->ib_pools[type].gfp_flags;
> +}
> +
>  /**
>   * amdgpu_ib_ring_tests - test IBs on the rings
>   *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 71c1ba735a6b..b4b424d84479
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -188,7 +188,7 @@ static enum drm_gpu_sched_stat
> amdgpu_job_timedout(struct drm_sched_job *s_job) int
> amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct
> drm_sched_entity *entity, void *owner,
>                    unsigned int num_ibs, u64 drm_client_id,
> -                  struct amdgpu_job **job)
> +                  gfp_t gfp_flags, struct amdgpu_job **job)
>  {
>       struct amdgpu_fence *af;
>       int r;
> @@ -196,18 +196,18 @@ int amdgpu_job_alloc(struct amdgpu_device *adev,
> struct amdgpu_vm *vm, if (num_ibs == 0)
>               return -EINVAL;
> 
> -     *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL);
> +     *job = kzalloc(struct_size(*job, ibs, num_ibs), gfp_flags);
>       if (!*job)
>               return -ENOMEM;
> 
> -     af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
> +     af = kzalloc(sizeof(struct amdgpu_fence), gfp_flags);
>       if (!af) {
>               r = -ENOMEM;
>               goto err_job;
>       }
>       (*job)->hw_fence = af;
> 
> -     af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
> +     af = kzalloc(sizeof(struct amdgpu_fence), gfp_flags);
>       if (!af) {
>               r = -ENOMEM;
>               goto err_fence;
> @@ -246,6 +246,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
> int r;
> 
>       r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, k_job_id,
> +                          amdgpu_ib_pool_gfp_flags(adev, 
pool_type),
>                            job);
>       if (r)
>               return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 6b7cf594714c..44fe40f9e8df
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> @@ -113,7 +113,7 @@ static inline struct amdgpu_ring *amdgpu_job_ring(struct
> amdgpu_job *job) int amdgpu_job_alloc(struct amdgpu_device *adev, struct
> amdgpu_vm *vm, struct drm_sched_entity *entity, void *owner,
>                    unsigned int num_ibs, u64 drm_client_id,
> -                  struct amdgpu_job **job);
> +                  gfp_t gfp_flags, struct amdgpu_job **job);
>  int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
>                            struct drm_sched_entity *entity, void 
*owner,
>                            size_t size, enum amdgpu_ib_pool_type 
pool_type,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 1a063a0a4280..4bf9734acc18
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -582,6 +582,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned num_ibs, struct dma_fence **f);
>  int amdgpu_ib_pool_init(struct amdgpu_device *adev);
>  void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
> +gfp_t amdgpu_ib_pool_gfp_flags(struct amdgpu_device *adev,
> +                            enum amdgpu_ib_pool_type type);
>  int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
>  bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring);
>  void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,




Reply via email to