On 27.06.25 05:39, Alex Deucher wrote: > Move the force completion handling into the common > engine reset function. No need to duplicate it for > every IP version. > > Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 5 ++++- > drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 17 +---------------- > drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 6 ++---- > drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 6 ++---- > 4 files changed, 9 insertions(+), 25 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > index 7e26a44dcc1fd..56939bb1d1a95 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > @@ -590,9 +590,12 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, > uint32_t instance_id) > * to be submitted to the queues after the reset is complete. > */ > if (!ret) { > + amdgpu_fence_driver_force_completion(gfx_ring); > drm_sched_wqueue_start(&gfx_ring->sched); > - if (adev->sdma.has_page_queue) > + if (adev->sdma.has_page_queue) { > + amdgpu_fence_driver_force_completion(page_ring);
Calling amdgpu_fence_driver_force_completion() here sounds like a really bad idea in the first place. That will mark all fences as completed making it impossible to execute the remaingin work. Regards, Christian. > drm_sched_wqueue_start(&page_ring->sched); > + } > } > mutex_unlock(&sdma_instance->engine_reset_mutex); > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > index d3072bca43e3f..572d105420ec3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c > @@ -1714,7 +1714,7 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_ring > *ring) > static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) > { > struct amdgpu_device *adev = ring->adev; > - u32 inst_mask, tmp_mask; > + u32 inst_mask; > int i, r; > > inst_mask = 1 << ring->me; > @@ -1733,21 +1733,6 @@ static int sdma_v4_4_2_restore_queue(struct > amdgpu_ring *ring) > } > > r = sdma_v4_4_2_inst_start(adev, inst_mask, true); > - if (r) > - return r; > - > - tmp_mask = inst_mask; > - for_each_inst(i, tmp_mask) { > - ring = &adev->sdma.instance[i].ring; > - > - amdgpu_fence_driver_force_completion(ring); > - > - if (adev->sdma.has_page_queue) { > - struct amdgpu_ring *page = &adev->sdma.instance[i].page; > - > - amdgpu_fence_driver_force_completion(page); > - } > - } > > return r; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > index 4d72b085b3dd7..ed1706da7deec 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > @@ -1618,10 +1618,8 @@ static int sdma_v5_0_restore_queue(struct amdgpu_ring > *ring) > > r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true); > amdgpu_gfx_rlc_exit_safe_mode(adev, 0); > - if (r) > - return r; > - amdgpu_fence_driver_force_completion(ring); > - return 0; > + > + return r; > } > > static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring) > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > index 42a25150f83ac..b87a4b44fa939 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c > @@ -1534,10 +1534,8 @@ static int sdma_v5_2_restore_queue(struct amdgpu_ring > *ring) > r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true); > > amdgpu_gfx_rlc_exit_safe_mode(adev, 0); > - if (r) > - return r; > - amdgpu_fence_driver_force_completion(ring); > - return 0; > + > + return r; > } > > static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)