AMD General

Comments inline .

-----Original Message-----
From: amd-gfx <[email protected]> On Behalf Of Deucher, 
Alexander
Sent: Thursday, May 21, 2026 8:20 PM
To: [email protected]; Koenig, Christian 
<[email protected]>; Khatri, Sunil <[email protected]>; Lin, Amber 
<[email protected]>; Zhang, Jesse(Jie) <[email protected]>; Liu, Shaoyun 
<[email protected]>
Cc: Rastogi, Manu <[email protected]>; Deucher, Alexander 
<[email protected]>; Zhang, Jesse(Jie) <[email protected]>
Subject: [PATCH 13/42] drm/amdgpu/gfx11: Refactor compute pipe reset and add 
HQD cleanup

From: Jesse Zhang <[email protected]>

Refactor gfx_v11_0_reset_compute_pipe() to accept explicit me, pipe, and queue 
parameters instead of deriving them from the ring structure. This enables the 
function to be used in generic pipe reset flows.

Introduce gfx_v11_0_clear_hqds_on_mec_pipe() to properly clear CP_HQD_ACTIVE 
and CP_HQD_DEQUEUE_REQUEST for all queues on a given MEC pipe while the pipe 
reset is asserted, ensuring the HQDs are torn down correctly before deasserting 
reset.

Switch the KCQ reset path to use the common MEC pipe reset helper 
amdgpu_gfx_mec_pipe_reset_run(), which coordinates the reset sequence including 
KFD suspend/resume to avoid conflicts with user mode queues.

v2: just update the sequence (Alex)

Suggested-by:  Manu Rastogi <[email protected]>
Suggested-by:  Alex Deucher <[email protected]>
Signed-off-by: Jesse Zhang <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 166 +++++++++++++++----------
 1 file changed, 100 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index dd4f33d2ce45f..1995de5e69991 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6874,11 +6874,39 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
        return amdgpu_ring_reset_helper_end(ring, timedout_fence);  }

-static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
+/*
+ * With MEC pipe reset asserted, clear CP_HQD_ACTIVE /
+CP_HQD_DEQUEUE_REQUEST for
+ * every queue on (me, pipe). HQDs must be torn down while pipe reset
+stays
+ * asserted; only then clear the pipe reset bit.
+ * Caller must hold adev->srbm_mutex.
+ */
+static void gfx_v11_0_clear_hqds_on_mec_pipe(struct amdgpu_device *adev, u32 
me,
+                                            u32 pipe)
 {
+       unsigned int q;
+       int j;

-       struct amdgpu_device *adev = ring->adev;
-       uint32_t reset_pipe = 0, clean_pipe = 0;
+       for (q = 0; q < adev->gfx.mec.num_queue_per_pipe; q++) {
+               soc21_grbm_select(adev, me, pipe, q, 0);
+               /* Start from a clean HQD dequeue state before forcing HQD 
inactive. */
+               WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
[shaoyunl] When  we are here , you should already execute the pipe reset 
successfully , now just need to clear the ACTIVE and  DEQUEST_RESET directly . 
Don't need to check it again .

+               if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+                       WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+                       for (j = 0; j < adev->usec_timeout; j++) {
+                               if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 
1))
+                                       break;
+                               udelay(1);
+                       }
+               }
+
+               WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0);
+       }
+}
+
+static int gfx_v11_0_reset_compute_pipe(struct amdgpu_device *adev,
+                                          u32 me, u32 pipe, u32 queue)
+{
+       uint32_t reset_val, clean_val;
        int r;

        if (!gfx_v11_pipe_reset_support(adev))
@@ -6886,109 +6914,115 @@ static int gfx_v11_0_reset_compute_pipe(struct 
amdgpu_ring *ring)

        gfx_v11_0_set_safe_mode(adev, 0);
        mutex_lock(&adev->srbm_mutex);
-       soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-
-       reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
-       clean_pipe = reset_pipe;
+       soc21_grbm_select(adev, me, pipe, queue, 0);

        if (adev->gfx.rs64_enable) {
+               reset_val = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+               clean_val = reset_val;

-               switch (ring->pipe) {
+               switch (pipe) {
                case 0:
-                       reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE0_RESET, 1);
-                       clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE0_RESET, 0);
+                       reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE0_RESET, 1);
+                       clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE0_RESET, 0);
                        break;
                case 1:
-                       reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE1_RESET, 1);
-                       clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE1_RESET, 0);
+                       reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE1_RESET, 1);
+                       clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE1_RESET, 0);
                        break;
                case 2:
-                       reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE2_RESET, 1);
-                       clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE2_RESET, 0);
+                       reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE2_RESET, 1);
+                       clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE2_RESET, 0);
                        break;
                case 3:
-                       reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE3_RESET, 1);
-                       clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE3_RESET, 0);
+                       reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE3_RESET, 1);
+                       clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE3_RESET, 0);
                        break;
                default:
                        break;
                }
-               WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
-               WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+               WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_val);
+               gfx_v11_0_clear_hqds_on_mec_pipe(adev, me, pipe);
+               soc21_grbm_select(adev, me, pipe, queue, 0);
+               WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_val);
                r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
                                        RS64_FW_UC_START_ADDR_LO;
        } else {
-               if (ring->me == 1) {
-                       switch (ring->pipe) {
+               reset_val = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
+               clean_val = reset_val;
+
+               if (me == 1) {
+                       switch (pipe) {
                        case 0:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE0_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE0_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE0_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE0_RESET, 
0);
                                break;
                        case 1:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE1_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE1_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE1_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE1_RESET, 
0);
                                break;
                        case 2:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE2_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE2_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE2_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE2_RESET, 
0);
                                break;
                        case 3:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE3_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE3_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE3_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE3_RESET, 
0);
                                break;
                        default:
                                break;
                        }
                        /* mec1 fw pc: CP_MEC1_INSTR_PNTR */
                } else {
-                       switch (ring->pipe) {
+                       switch (pipe) {
                        case 0:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE0_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE0_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE0_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE0_RESET, 
0);
                                break;
                        case 1:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE1_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE1_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE1_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE1_RESET, 
0);
                                break;
                        case 2:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE2_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE2_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE2_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE2_RESET, 
0);
                                break;
                        case 3:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE3_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE3_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE3_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE3_RESET, 
0);
                                break;
                        default:
                                break;
                        }
                        /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */
                }
-               WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
-               WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+               WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_val);
+               gfx_v11_0_clear_hqds_on_mec_pipe(adev, me, pipe);
+               soc21_grbm_select(adev, me, pipe, queue, 0);
+               WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_val);
                r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR));
        }

@@ -6996,8 +7030,8 @@ static int gfx_v11_0_reset_compute_pipe(struct 
amdgpu_ring *ring)
        mutex_unlock(&adev->srbm_mutex);
        gfx_v11_0_unset_safe_mode(adev, 0);

-       dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", 
ring->name,
-                       r == 0 ? "successfully" : "failed");
+       dev_dbg(adev->dev, "MEC pipe me%u pipe%u queue%u resets to MEC FW start 
PC: %s\n",
+               me, pipe, queue, r == 0 ? "successfully" : "failed");
        /*FIXME:Sometimes driver can't cache the MEC firmware start PC 
correctly, so the pipe
         * reset status relies on the compute ring test result.
         */
@@ -7017,7 +7051,7 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
        r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, use_mmio, 0);
        if (r) {
                dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe 
reset\n", r);
-               r = gfx_v11_0_reset_compute_pipe(ring);
+               r = gfx_v11_0_reset_compute_pipe(adev, ring->me, ring->pipe,
+ring->queue);
                if (r)
                        return r;
        }
--
2.54.0

Reply via email to