For job allocation in GPU reset and page fault handling we must use GFP_ATOMIC to guarantee that we don't cycle back and depend on a dma_fence submission for the memory allocation.
Co-developed by Claude Sonnet 4. Signed-off-by: Christian König <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 11 +++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 9 +++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 ++ 6 files changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 44751d71b741..c425f960361c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -696,7 +696,8 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, goto err; } - ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, 0, &job); + ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, 0, GFP_KERNEL, + &job); if (ret) goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index fdf01d824d66..3ed2cb9dfa38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -275,7 +275,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, for (i = 0; i < p->gang_size; ++i) { ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm, num_ibs[i], p->filp->client_id, - &p->jobs[i]); + GFP_KERNEL, &p->jobs[i]); if (ret) goto free_all_kdata; switch (p->adev->enforce_isolation[fpriv->xcp_id]) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 60e4c3985029..83a802cf5837 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -416,6 +416,17 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev) adev->ib_pool_ready = false; } +/** + * amdgpu_ib_pool_gfp_flags - Returns the gfp flags to use for each pool + * @adev: amdgpu device pointer + * @type: the IB pool type + */ +gfp_t amdgpu_ib_pool_gfp_flags(struct amdgpu_device *adev, + enum amdgpu_ib_pool_type type) +{ + return adev->ib_pools[type].gfp_flags; +} + /** * amdgpu_ib_ring_tests - test IBs on the rings * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 71c1ba735a6b..b4b424d84479 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -188,7 +188,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_sched_entity *entity, void *owner, unsigned int num_ibs, u64 drm_client_id, - struct amdgpu_job **job) + gfp_t gfp_flags, struct amdgpu_job **job) { struct amdgpu_fence *af; int r; @@ -196,18 +196,18 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (num_ibs == 0) return -EINVAL; - *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL); + *job = kzalloc(struct_size(*job, ibs, num_ibs), gfp_flags); if (!*job) return -ENOMEM; - af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); + af = kzalloc(sizeof(struct amdgpu_fence), gfp_flags); if (!af) { r = -ENOMEM; goto err_job; } (*job)->hw_fence = af; - af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); + af = kzalloc(sizeof(struct amdgpu_fence), gfp_flags); if (!af) { r = -ENOMEM; goto err_fence; @@ -246,6 +246,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, int r; r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, k_job_id, + amdgpu_ib_pool_gfp_flags(adev, pool_type), job); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 6b7cf594714c..44fe40f9e8df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -113,7 +113,7 @@ static inline struct amdgpu_ring *amdgpu_job_ring(struct amdgpu_job *job) int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_sched_entity *entity, void *owner, unsigned int num_ibs, u64 drm_client_id, - struct amdgpu_job **job); + gfp_t gfp_flags, struct amdgpu_job **job); int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, struct drm_sched_entity *entity, void *owner, size_t size, enum amdgpu_ib_pool_type pool_type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 1a063a0a4280..4bf9734acc18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -582,6 +582,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct dma_fence **f); int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); +gfp_t amdgpu_ib_pool_gfp_flags(struct amdgpu_device *adev, + enum amdgpu_ib_pool_type type); int amdgpu_ib_ring_tests(struct amdgpu_device *adev); bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring); void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, -- 2.43.0
