[AMD Official Use Only - AMD Internal Distribution Only] Reviewed-by: Jesse Zhang <[email protected]>
> -----Original Message----- > From: amd-gfx <[email protected]> On Behalf Of Alex > Deucher > Sent: Wednesday, January 21, 2026 11:01 AM > To: [email protected] > Cc: Deucher, Alexander <[email protected]> > Subject: [PATCH 02/10] drm/amdgpu/job: use GFP_ATOMIC while in gpu reset > > If we need to allocate a job during GPU reset, use GFP_ATOMIC rather than > GFP_KERNEL. > > v2: use pool type it determine alloc flags. > > Signed-off-by: Alex Deucher <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 13 ++++++++----- > drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 5 +++-- > 7 files changed, 16 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > index 877d0df50376a..89df26dd5ada7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > @@ -672,7 +672,7 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device > *adev, > goto err; > } > > - ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job, 0); > + ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job, 0, false); > if (ret) > goto err; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index d591dce0f3b3c..4d53d9cb8490d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -282,7 +282,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, > for (i = 0; i < p->gang_size; ++i) { > ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm, > num_ibs[i], &p->jobs[i], > - p->filp->client_id); > + p->filp->client_id, false); > if (ret) > goto free_all_kdata; > switch (p->adev->enforce_isolation[fpriv->xcp_id]) { diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > index 72ec455fa932c..d90966daf52fc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > @@ -69,7 +69,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct > amdgpu_vm *vm, > > if (size) { > r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type], > - &ib->sa_bo, size); > + &ib->sa_bo, size, pool_type == > AMDGPU_IB_POOL_DIRECT); > if (r) { > dev_err(adev->dev, "failed to get a new IB (%d)\n", r); > return r; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > index ec8d74db62758..8660e3d1c3088 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > @@ -190,7 +190,7 @@ static enum drm_gpu_sched_stat > amdgpu_job_timedout(struct drm_sched_job *s_job) int amdgpu_job_alloc(struct > amdgpu_device *adev, struct amdgpu_vm *vm, > struct drm_sched_entity *entity, void *owner, > unsigned int num_ibs, struct amdgpu_job **job, > - u64 drm_client_id) > + u64 drm_client_id, bool need_atomic) > { > struct amdgpu_fence *af; > int r; > @@ -198,18 +198,21 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, > struct amdgpu_vm *vm, > if (num_ibs == 0) > return -EINVAL; > > - *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL); > + *job = kzalloc(struct_size(*job, ibs, num_ibs), > + need_atomic ? GFP_ATOMIC : GFP_KERNEL); > if (!*job) > return -ENOMEM; > > - af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); > + af = kzalloc(sizeof(struct amdgpu_fence), > + need_atomic ? GFP_ATOMIC : GFP_KERNEL); > if (!af) { > r = -ENOMEM; > goto err_job; > } > (*job)->hw_fence = af; > > - af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); > + af = kzalloc(sizeof(struct amdgpu_fence), > + need_atomic ? GFP_ATOMIC : GFP_KERNEL); > if (!af) { > r = -ENOMEM; > goto err_fence; > @@ -248,7 +251,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device > *adev, > int r; > > r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job, > - k_job_id); > + k_job_id, pool_type == AMDGPU_IB_POOL_DIRECT); > if (r) > return r; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h > index 56a88e14a0448..9de2cae966fea 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h > @@ -113,7 +113,7 @@ static inline struct amdgpu_ring *amdgpu_job_ring(struct > amdgpu_job *job) int amdgpu_job_alloc(struct amdgpu_device *adev, struct > amdgpu_vm *vm, > struct drm_sched_entity *entity, void *owner, > unsigned int num_ibs, struct amdgpu_job **job, > - u64 drm_client_id); > + u64 drm_client_id, bool need_atomic); > int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, > struct drm_sched_entity *entity, void *owner, > size_t size, enum amdgpu_ib_pool_type pool_type, > diff -- > git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > index 912c9afaf9e11..8abff5fdae81d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > @@ -341,7 +341,7 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device > *adev, > struct amdgpu_sa_manager *sa_manager); > int > amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, > struct drm_suballoc **sa_bo, > - unsigned int size); > + unsigned int size, bool need_atomic); > void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo, > struct dma_fence *fence); > #if defined(CONFIG_DEBUG_FS) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c > index 39070b2a4c04f..1d44b95de7e55 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c > @@ -78,10 +78,11 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device > *adev, > > int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, > struct drm_suballoc **sa_bo, > - unsigned int size) > + unsigned int size, bool need_atomic) > { > struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size, > - GFP_KERNEL, false, 0); > + need_atomic ? GFP_ATOMIC : > GFP_KERNEL, > + false, 0); > > if (IS_ERR(sa)) { > *sa_bo = NULL; > -- > 2.52.0
