AMD General

Regards,
      Prike

> -----Original Message-----
> From: amd-gfx <[email protected]> On Behalf Of Jesse
> Zhang
> Sent: Monday, June 1, 2026 1:49 PM
> To: [email protected]
> Cc: Deucher, Alexander <[email protected]>; Koenig, Christian
> <[email protected]>; Zhang, Jesse(Jie) <[email protected]>;
> Rastogi, Manu <[email protected]>; Deucher, Alexander
> <[email protected]>; Zhang, Jesse(Jie) <[email protected]>
> Subject: [PATCH v2 14/42] drm/amdgpu/gfx12: Refactor compute pipe reset and 
> add
> HQD cleanup
>
> Refactor gfx_v12_0_reset_compute_pipe() to accept explicit me, pipe, and queue
> parameters instead of deriving them from the ring structure. This enables the
> function to be used in generic pipe reset flows.
>
> Introduce gfx_v12_0_clear_hqds_on_mec_pipe() to properly clear
> CP_HQD_ACTIVE and CP_HQD_DEQUEUE_REQUEST for all queues on a given
> MEC pipe while the pipe reset is asserted, ensuring the HQDs are torn down
> correctly before deasserting reset.
>
> Switch the KCQ reset path to use the common MEC pipe reset helper
> amdgpu_gfx_mec_pipe_reset_run(), which coordinates the reset sequence 
> including
> KFD suspend/resume to avoid conflicts with user mode queues.
>
> v2: just update the sequence (Alex)
> v3: directly clear ACTIVE and DEQUEUE_REQUEST (Shaoyun Liu)
>
> Suggested-by:  Manu Rastogi <[email protected]>
> Suggested-by:  Alex Deucher <[email protected]>
> Signed-off-by: Jesse Zhang <[email protected]>
> Signed-off-by: Alex Deucher <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 115 +++++++++++++++----------
>  1 file changed, 69 insertions(+), 46 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> index 4be650ce1fba..f7783129ecc8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> @@ -5309,10 +5309,30 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring
> *ring,
>       return amdgpu_ring_reset_helper_end(ring, timedout_fence);  }
>
> -static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring)
> +/*
> + * With MEC pipe reset asserted, clear CP_HQD_ACTIVE /
> +CP_HQD_DEQUEUE_REQUEST for
> + * every queue on (me, pipe). HQDs must be torn down while pipe reset
> +stays
> + * asserted; only then clear the pipe reset bit.
> + * Caller must hold adev->srbm_mutex.
> + */
> +static void gfx_v12_0_clear_hqds_on_mec_pipe(struct amdgpu_device *adev, u32
> me,
> +                                          u32 pipe)
>  {
> -     struct amdgpu_device *adev = ring->adev;
> -     uint32_t reset_pipe = 0, clean_pipe = 0;
> +     unsigned int q;
> +     int j;
> +
> +     for (q = 0; q < adev->gfx.mec.num_queue_per_pipe; q++) {
> +             soc24_grbm_select(adev, me, pipe, q, 0);
> +             /* Start from a clean HQD dequeue state before forcing HQD 
> inactive.
> */
> +             WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
> +             WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0);
> +     }
> +}
> +
> +static int gfx_v12_0_reset_compute_pipe(struct amdgpu_device *adev,
> +                                        u32 me, u32 pipe, u32 queue)
> +{
> +     uint32_t reset_val, clean_val;
>       int r = 0;
>
>       if (!gfx_v12_pipe_reset_support(adev))
> @@ -5320,75 +5340,78 @@ static int gfx_v12_0_reset_compute_pipe(struct
> amdgpu_ring *ring)
>
>       gfx_v12_0_set_safe_mode(adev, 0);
>       mutex_lock(&adev->srbm_mutex);
> -     soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> -
> -     reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
> -     clean_pipe = reset_pipe;
> -
> +     soc24_grbm_select(adev, me, pipe, queue, 0);
>       if (adev->gfx.rs64_enable) {
> -             switch (ring->pipe) {
> +             reset_val = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
> +             clean_val = reset_val;
> +
> +             switch (pipe) {
>               case 0:
> -                     reset_pipe = REG_SET_FIELD(reset_pipe,
> CP_MEC_RS64_CNTL,
> -                                                MEC_PIPE0_RESET, 1);
> -                     clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> -                                                MEC_PIPE0_RESET, 0);
> +                     reset_val = REG_SET_FIELD(reset_val,
> CP_MEC_RS64_CNTL,
> +                                               MEC_PIPE0_RESET, 1);
> +                     clean_val = REG_SET_FIELD(clean_val,
> CP_MEC_RS64_CNTL,
> +                                               MEC_PIPE0_RESET, 0);
>                       break;
>               case 1:
> -                     reset_pipe = REG_SET_FIELD(reset_pipe,
> CP_MEC_RS64_CNTL,
> -                                                MEC_PIPE1_RESET, 1);
> -                     clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> -                                                MEC_PIPE1_RESET, 0);
> +                     reset_val = REG_SET_FIELD(reset_val,
> CP_MEC_RS64_CNTL,
> +                                               MEC_PIPE1_RESET, 1);
> +                     clean_val = REG_SET_FIELD(clean_val,
> CP_MEC_RS64_CNTL,
> +                                               MEC_PIPE1_RESET, 0);
>                       break;
>               case 2:
> -                     reset_pipe = REG_SET_FIELD(reset_pipe,
> CP_MEC_RS64_CNTL,
> -                                                MEC_PIPE2_RESET, 1);
> -                     clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> -                                                MEC_PIPE2_RESET, 0);
> +                     reset_val = REG_SET_FIELD(reset_val,
> CP_MEC_RS64_CNTL,
> +                                               MEC_PIPE2_RESET, 1);
> +                     clean_val = REG_SET_FIELD(clean_val,
> CP_MEC_RS64_CNTL,
> +                                               MEC_PIPE2_RESET, 0);
>                       break;
>               case 3:
> -                     reset_pipe = REG_SET_FIELD(reset_pipe,
> CP_MEC_RS64_CNTL,
> -                                                MEC_PIPE3_RESET, 1);
> -                     clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> -                                                MEC_PIPE3_RESET, 0);
> +                     reset_val = REG_SET_FIELD(reset_val,
> CP_MEC_RS64_CNTL,
> +                                               MEC_PIPE3_RESET, 1);
> +                     clean_val = REG_SET_FIELD(clean_val,
> CP_MEC_RS64_CNTL,
> +                                               MEC_PIPE3_RESET, 0);
>                       break;
>               default:
>                       break;
>               }
> -             WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
> -             WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
> +             WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_val);
> +             gfx_v12_0_clear_hqds_on_mec_pipe(adev, me, pipe);

Same as patch#13 which requires removing the queue selecting, other than that 
the patch is Reviewed-by: Prike Liang <[email protected]>

> +             soc24_grbm_select(adev, me, pipe, queue, 0);
> +             WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_val);
>               r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) <<
> 2) -
>                               RS64_FW_UC_START_ADDR_LO;
>       } else {
> -             switch (ring->pipe) {
> +             reset_val = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
> +             clean_val = reset_val;
> +
> +             switch (pipe) {
>               case 0:
> -                     reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
> -                                                        MEC_ME1_PIPE0_RESET,
> 1);
> -                     clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
> -                                                        MEC_ME1_PIPE0_RESET,
> 0);
> +                     reset_val = REG_SET_FIELD(reset_val, CP_MEC_CNTL,
> +                                               MEC_ME1_PIPE0_RESET, 1);
> +                     clean_val = REG_SET_FIELD(clean_val, CP_MEC_CNTL,
> +                                               MEC_ME1_PIPE0_RESET, 0);
>                       break;
>               case 1:
> -                     reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
> -                                                        MEC_ME1_PIPE1_RESET,
> 1);
> -                     clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
> -                                                        MEC_ME1_PIPE1_RESET,
> 0);
> +                     reset_val = REG_SET_FIELD(reset_val, CP_MEC_CNTL,
> +                                               MEC_ME1_PIPE1_RESET, 1);
> +                     clean_val = REG_SET_FIELD(clean_val, CP_MEC_CNTL,
> +                                               MEC_ME1_PIPE1_RESET, 0);
>                       break;
>               default:
> -             break;
> +                     break;
>               }
> -             WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
> -             WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
> -             /* Doesn't find the F32 MEC instruction pointer register, and 
> suppose
> -              * the driver won't run into the F32 mode.
> -              */
> +
> +             WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_val);
> +             gfx_v12_0_clear_hqds_on_mec_pipe(adev, me, pipe);
> +             soc24_grbm_select(adev, me, pipe, queue, 0);
> +             WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_val);
>       }
>
>       soc24_grbm_select(adev, 0, 0, 0, 0);
>       mutex_unlock(&adev->srbm_mutex);
>       gfx_v12_0_unset_safe_mode(adev, 0);
>
> -     dev_info(adev->dev, "The ring %s pipe resets: %s\n", ring->name,
> -                     r == 0 ? "successfully" : "failed");
> -     /* Need the ring test to verify the pipe reset result.*/
> +     dev_dbg(adev->dev, "MEC pipe me%u pipe%u queue%u resets to MEC FW
> start PC: %s\n",
> +             me, pipe, queue, r == 0 ? "successfully" : "failed");
>       return 0;
>  }
>
> @@ -5405,7 +5428,7 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
>       r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, use_mmio, 0);
>       if (r) {
>               dev_warn(adev->dev, "fail(%d) to reset kcq  and try pipe 
> reset\n", r);
> -             r = gfx_v12_0_reset_compute_pipe(ring);
> +             r = gfx_v12_0_reset_compute_pipe(adev, ring->me, ring->pipe,
> +ring->queue);
>               if (r)
>                       return r;
>       }
> --
> 2.49.0

Reply via email to