AMD General

> -----Original Message-----
> From: Liu, Shaoyun <[email protected]>
> Sent: Saturday, May 30, 2026 12:05 AM
> To: Deucher, Alexander <[email protected]>; amd-
> [email protected]; Koenig, Christian <[email protected]>; 
> Khatri,
> Sunil <[email protected]>; Lin, Amber <[email protected]>; Zhang,
> Jesse(Jie) <[email protected]>
> Cc: Rastogi, Manu <[email protected]>; Zhang, Jesse(Jie)
> <[email protected]>
> Subject: RE: [PATCH 13/42] drm/amdgpu/gfx11: Refactor compute pipe reset and
> add HQD cleanup
>
> AMD General
>
> Comments inline .
>
> -----Original Message-----
> From: amd-gfx <[email protected]> On Behalf Of Deucher,
> Alexander
> Sent: Thursday, May 21, 2026 8:20 PM
> To: [email protected]; Koenig, Christian
> <[email protected]>; Khatri, Sunil <[email protected]>; Lin, Amber
> <[email protected]>; Zhang, Jesse(Jie) <[email protected]>; Liu,
> Shaoyun <[email protected]>
> Cc: Rastogi, Manu <[email protected]>; Deucher, Alexander
> <[email protected]>; Zhang, Jesse(Jie) <[email protected]>
> Subject: [PATCH 13/42] drm/amdgpu/gfx11: Refactor compute pipe reset and add
> HQD cleanup
>
> From: Jesse Zhang <[email protected]>
>
> Refactor gfx_v11_0_reset_compute_pipe() to accept explicit me, pipe, and queue
> parameters instead of deriving them from the ring structure. This enables the
> function to be used in generic pipe reset flows.
>
> Introduce gfx_v11_0_clear_hqds_on_mec_pipe() to properly clear
> CP_HQD_ACTIVE and CP_HQD_DEQUEUE_REQUEST for all queues on a
> given MEC pipe while the pipe reset is asserted, ensuring the HQDs are torn 
> down
> correctly before deasserting reset.
>
> Switch the KCQ reset path to use the common MEC pipe reset helper
> amdgpu_gfx_mec_pipe_reset_run(), which coordinates the reset sequence
> including KFD suspend/resume to avoid conflicts with user mode queues.
>
> v2: just update the sequence (Alex)
>
> Suggested-by:  Manu Rastogi <[email protected]>
> Suggested-by:  Alex Deucher <[email protected]>
> Signed-off-by: Jesse Zhang <[email protected]>
> Signed-off-by: Alex Deucher <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 166 +++++++++++++++----------
>  1 file changed, 100 insertions(+), 66 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index dd4f33d2ce45f..1995de5e69991 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -6874,11 +6874,39 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring
> *ring,
>         return amdgpu_ring_reset_helper_end(ring, timedout_fence);  }
>
> -static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
> +/*
> + * With MEC pipe reset asserted, clear CP_HQD_ACTIVE /
> +CP_HQD_DEQUEUE_REQUEST for
> + * every queue on (me, pipe). HQDs must be torn down while pipe reset
> +stays
> + * asserted; only then clear the pipe reset bit.
> + * Caller must hold adev->srbm_mutex.
> + */
> +static void gfx_v11_0_clear_hqds_on_mec_pipe(struct amdgpu_device *adev, u32
> me,
> +                                            u32 pipe)
>  {
> +       unsigned int q;
> +       int j;
>
> -       struct amdgpu_device *adev = ring->adev;
> -       uint32_t reset_pipe = 0, clean_pipe = 0;
> +       for (q = 0; q < adev->gfx.mec.num_queue_per_pipe; q++) {
> +               soc21_grbm_select(adev, me, pipe, q, 0);
> +               /* Start from a clean HQD dequeue state before forcing HQD 
> inactive. */
> +               WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
> [shaoyunl] When  we are here , you should already execute the pipe reset
> successfully , now just need to clear the ACTIVE and  DEQUEST_RESET directly .
> Don't need to check it again .
[Zhang, Jesse(Jie)] Thanks Shaoyun, 1.will remove the HQD active poll 
gfx_v11_0_clear_hqds_on_mec_pipe() - direct clear under pipe reset.
2. this applies to per-queue MMIO reset(DEQUEUE_REQUEST=0x2 + SPI reset) in 
mes_v11_0_reset_queue_mmio().

>
> +               if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
> +                       WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
> 1);
> +                       for (j = 0; j < adev->usec_timeout; j++) {
> +                               if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 
> 1))
> +                                       break;
> +                               udelay(1);
> +                       }
> +               }
> +
> +               WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0);
> +       }
> +}
> +
> +static int gfx_v11_0_reset_compute_pipe(struct amdgpu_device *adev,
> +                                          u32 me, u32 pipe, u32 queue)
> +{
> +       uint32_t reset_val, clean_val;
>         int r;
>
>         if (!gfx_v11_pipe_reset_support(adev))
> @@ -6886,109 +6914,115 @@ static int gfx_v11_0_reset_compute_pipe(struct
> amdgpu_ring *ring)
>
>         gfx_v11_0_set_safe_mode(adev, 0);
>         mutex_lock(&adev->srbm_mutex);
> -       soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> -
> -       reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
> -       clean_pipe = reset_pipe;
> +       soc21_grbm_select(adev, me, pipe, queue, 0);
>
>         if (adev->gfx.rs64_enable) {
> +               reset_val = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
> +               clean_val = reset_val;
>
> -               switch (ring->pipe) {
> +               switch (pipe) {
>                 case 0:
> -                       reset_pipe = REG_SET_FIELD(reset_pipe, 
> CP_MEC_RS64_CNTL,
> -                                                  MEC_PIPE0_RESET, 1);
> -                       clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> -                                                  MEC_PIPE0_RESET, 0);
> +                       reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
> +                                                 MEC_PIPE0_RESET, 1);
> +                       clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
> +                                                 MEC_PIPE0_RESET, 0);
>                         break;
>                 case 1:
> -                       reset_pipe = REG_SET_FIELD(reset_pipe, 
> CP_MEC_RS64_CNTL,
> -                                                  MEC_PIPE1_RESET, 1);
> -                       clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> -                                                  MEC_PIPE1_RESET, 0);
> +                       reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
> +                                                 MEC_PIPE1_RESET, 1);
> +                       clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
> +                                                 MEC_PIPE1_RESET, 0);
>                         break;
>                 case 2:
> -                       reset_pipe = REG_SET_FIELD(reset_pipe, 
> CP_MEC_RS64_CNTL,
> -                                                  MEC_PIPE2_RESET, 1);
> -                       clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> -                                                  MEC_PIPE2_RESET, 0);
> +                       reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
> +                                                 MEC_PIPE2_RESET, 1);
> +                       clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
> +                                                 MEC_PIPE2_RESET, 0);
>                         break;
>                 case 3:
> -                       reset_pipe = REG_SET_FIELD(reset_pipe, 
> CP_MEC_RS64_CNTL,
> -                                                  MEC_PIPE3_RESET, 1);
> -                       clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> -                                                  MEC_PIPE3_RESET, 0);
> +                       reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
> +                                                 MEC_PIPE3_RESET, 1);
> +                       clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
> +                                                 MEC_PIPE3_RESET, 0);
>                         break;
>                 default:
>                         break;
>                 }
> -               WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
> -               WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
> +               WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_val);
> +               gfx_v11_0_clear_hqds_on_mec_pipe(adev, me, pipe);
> +               soc21_grbm_select(adev, me, pipe, queue, 0);
> +               WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_val);
>                 r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
>                                         RS64_FW_UC_START_ADDR_LO;
>         } else {
> -               if (ring->me == 1) {
> -                       switch (ring->pipe) {
> +               reset_val = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
> +               clean_val = reset_val;
> +
> +               if (me == 1) {
> +                       switch (pipe) {
>                         case 0:
> -                               reset_pipe = REG_SET_FIELD(reset_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME1_PIPE0_RESET, 1);
> -                               clean_pipe = REG_SET_FIELD(clean_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME1_PIPE0_RESET, 0);
> +                               reset_val = REG_SET_FIELD(reset_val, 
> CP_MEC_CNTL,
> +                                                         
> MEC_ME1_PIPE0_RESET, 1);
> +                               clean_val = REG_SET_FIELD(clean_val, 
> CP_MEC_CNTL,
> +
> + MEC_ME1_PIPE0_RESET, 0);
>                                 break;
>                         case 1:
> -                               reset_pipe = REG_SET_FIELD(reset_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME1_PIPE1_RESET, 1);
> -                               clean_pipe = REG_SET_FIELD(clean_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME1_PIPE1_RESET, 0);
> +                               reset_val = REG_SET_FIELD(reset_val, 
> CP_MEC_CNTL,
> +                                                         
> MEC_ME1_PIPE1_RESET, 1);
> +                               clean_val = REG_SET_FIELD(clean_val, 
> CP_MEC_CNTL,
> +
> + MEC_ME1_PIPE1_RESET, 0);
>                                 break;
>                         case 2:
> -                               reset_pipe = REG_SET_FIELD(reset_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME1_PIPE2_RESET, 1);
> -                               clean_pipe = REG_SET_FIELD(clean_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME1_PIPE2_RESET, 0);
> +                               reset_val = REG_SET_FIELD(reset_val, 
> CP_MEC_CNTL,
> +                                                         
> MEC_ME1_PIPE2_RESET, 1);
> +                               clean_val = REG_SET_FIELD(clean_val, 
> CP_MEC_CNTL,
> +
> + MEC_ME1_PIPE2_RESET, 0);
>                                 break;
>                         case 3:
> -                               reset_pipe = REG_SET_FIELD(reset_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME1_PIPE3_RESET, 1);
> -                               clean_pipe = REG_SET_FIELD(clean_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME1_PIPE3_RESET, 0);
> +                               reset_val = REG_SET_FIELD(reset_val, 
> CP_MEC_CNTL,
> +                                                         
> MEC_ME1_PIPE3_RESET, 1);
> +                               clean_val = REG_SET_FIELD(clean_val, 
> CP_MEC_CNTL,
> +
> + MEC_ME1_PIPE3_RESET, 0);
>                                 break;
>                         default:
>                                 break;
>                         }
>                         /* mec1 fw pc: CP_MEC1_INSTR_PNTR */
>                 } else {
> -                       switch (ring->pipe) {
> +                       switch (pipe) {
>                         case 0:
> -                               reset_pipe = REG_SET_FIELD(reset_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME2_PIPE0_RESET, 1);
> -                               clean_pipe = REG_SET_FIELD(clean_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME2_PIPE0_RESET, 0);
> +                               reset_val = REG_SET_FIELD(reset_val, 
> CP_MEC_CNTL,
> +                                                         
> MEC_ME2_PIPE0_RESET, 1);
> +                               clean_val = REG_SET_FIELD(clean_val, 
> CP_MEC_CNTL,
> +
> + MEC_ME2_PIPE0_RESET, 0);
>                                 break;
>                         case 1:
> -                               reset_pipe = REG_SET_FIELD(reset_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME2_PIPE1_RESET, 1);
> -                               clean_pipe = REG_SET_FIELD(clean_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME2_PIPE1_RESET, 0);
> +                               reset_val = REG_SET_FIELD(reset_val, 
> CP_MEC_CNTL,
> +                                                         
> MEC_ME2_PIPE1_RESET, 1);
> +                               clean_val = REG_SET_FIELD(clean_val, 
> CP_MEC_CNTL,
> +
> + MEC_ME2_PIPE1_RESET, 0);
>                                 break;
>                         case 2:
> -                               reset_pipe = REG_SET_FIELD(reset_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME2_PIPE2_RESET, 1);
> -                               clean_pipe = REG_SET_FIELD(clean_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME2_PIPE2_RESET, 0);
> +                               reset_val = REG_SET_FIELD(reset_val, 
> CP_MEC_CNTL,
> +                                                         
> MEC_ME2_PIPE2_RESET, 1);
> +                               clean_val = REG_SET_FIELD(clean_val, 
> CP_MEC_CNTL,
> +
> + MEC_ME2_PIPE2_RESET, 0);
>                                 break;
>                         case 3:
> -                               reset_pipe = REG_SET_FIELD(reset_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME2_PIPE3_RESET, 1);
> -                               clean_pipe = REG_SET_FIELD(clean_pipe, 
> CP_MEC_CNTL,
> -                                                          
> MEC_ME2_PIPE3_RESET, 0);
> +                               reset_val = REG_SET_FIELD(reset_val, 
> CP_MEC_CNTL,
> +                                                         
> MEC_ME2_PIPE3_RESET, 1);
> +                               clean_val = REG_SET_FIELD(clean_val, 
> CP_MEC_CNTL,
> +
> + MEC_ME2_PIPE3_RESET, 0);
>                                 break;
>                         default:
>                                 break;
>                         }
>                         /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */
>                 }
> -               WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
> -               WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
> +               WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_val);
> +               gfx_v11_0_clear_hqds_on_mec_pipe(adev, me, pipe);
> +               soc21_grbm_select(adev, me, pipe, queue, 0);
> +               WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_val);
>                 r = RREG32(SOC15_REG_OFFSET(GC, 0,
> regCP_MEC1_INSTR_PNTR));
>         }
>
> @@ -6996,8 +7030,8 @@ static int gfx_v11_0_reset_compute_pipe(struct
> amdgpu_ring *ring)
>         mutex_unlock(&adev->srbm_mutex);
>         gfx_v11_0_unset_safe_mode(adev, 0);
>
> -       dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: 
> %s\n",
> ring->name,
> -                       r == 0 ? "successfully" : "failed");
> +       dev_dbg(adev->dev, "MEC pipe me%u pipe%u queue%u resets to MEC FW
> start PC: %s\n",
> +               me, pipe, queue, r == 0 ? "successfully" : "failed");
>         /*FIXME:Sometimes driver can't cache the MEC firmware start PC 
> correctly,
> so the pipe
>          * reset status relies on the compute ring test result.
>          */
> @@ -7017,7 +7051,7 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring
> *ring,
>         r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, use_mmio, 
> 0);
>         if (r) {
>                 dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe 
> reset\n", r);
> -               r = gfx_v11_0_reset_compute_pipe(ring);
> +               r = gfx_v11_0_reset_compute_pipe(adev, ring->me,
> + ring->pipe,
> +ring->queue);
>                 if (r)
>                         return r;
>         }
> --
> 2.54.0
>

Reply via email to