Hold MEC pipe reset asserted, walk every queue on that (me, pipe) and tear
down CP_HQD_ACTIVE / CP_HQD_DEQUEUE_REQUEST via
gfx_v11_0_clear_hqds_on_mec_pipe(), then deassert reset. Avoids releasing
pipe reset while HQDs may still be active.

Legacy (non-RS64) path: read CP_MEC_CNTL for the reset mask instead of
reusing CP_MEC_RS64_CNTL state.

Wire reset_kcq into the new amdgpu_gfx_mec_pipe_reset_{prepare,
recover_queues} helpers so sibling KCQs on the same pipe are stopped,
restarted, and have their fences handled correctly.

Suggested-by: Manu Rastogi <[email protected]>
Suggested-by: Alex Deucher <[email protected]>
Signed-off-by: Jesse Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 151 +++++++++++++++----------
 1 file changed, 93 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index fabdbbd0abb7..0e79336807de 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6874,11 +6874,34 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
        return amdgpu_ring_reset_helper_end(ring, timedout_fence);
 }
 
+/* Caller must hold adev->srbm_mutex; pipe reset must be asserted. */
+static void gfx_v11_0_clear_hqds_on_mec_pipe(struct amdgpu_device *adev, u32 
me,
+                                            u32 pipe)
+{
+       unsigned int q;
+       int j;
+
+       for (q = 0; q < adev->gfx.mec.num_queue_per_pipe; q++) {
+               soc21_grbm_select(adev, me, pipe, q, 0);
+               WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
+               if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+                       WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+                       for (j = 0; j < adev->usec_timeout; j++) {
+                               if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 
1))
+                                       break;
+                               udelay(1);
+                       }
+               }
+
+               WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0);
+       }
+}
+
 static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
 {
 
        struct amdgpu_device *adev = ring->adev;
-       uint32_t reset_pipe = 0, clean_pipe = 0;
+       uint32_t reset_val, clean_val;
        int r;
 
        if (!gfx_v11_pipe_reset_support(adev))
@@ -6888,69 +6911,73 @@ static int gfx_v11_0_reset_compute_pipe(struct 
amdgpu_ring *ring)
        mutex_lock(&adev->srbm_mutex);
        soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 
-       reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
-       clean_pipe = reset_pipe;
-
        if (adev->gfx.rs64_enable) {
+               reset_val = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+               clean_val = reset_val;
 
                switch (ring->pipe) {
                case 0:
-                       reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE0_RESET, 1);
-                       clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE0_RESET, 0);
+                       reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE0_RESET, 1);
+                       clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE0_RESET, 0);
                        break;
                case 1:
-                       reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE1_RESET, 1);
-                       clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE1_RESET, 0);
+                       reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE1_RESET, 1);
+                       clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE1_RESET, 0);
                        break;
                case 2:
-                       reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE2_RESET, 1);
-                       clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE2_RESET, 0);
+                       reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE2_RESET, 1);
+                       clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE2_RESET, 0);
                        break;
                case 3:
-                       reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE3_RESET, 1);
-                       clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
-                                                  MEC_PIPE3_RESET, 0);
+                       reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE3_RESET, 1);
+                       clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
+                                                 MEC_PIPE3_RESET, 0);
                        break;
                default:
                        break;
                }
-               WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
-               WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+               WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_val);
+               gfx_v11_0_clear_hqds_on_mec_pipe(adev, ring->me, ring->pipe);
+               soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+               WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_val);
                r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
                                        RS64_FW_UC_START_ADDR_LO;
        } else {
+               reset_val = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
+               clean_val = reset_val;
+
                if (ring->me == 1) {
                        switch (ring->pipe) {
                        case 0:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE0_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE0_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE0_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE0_RESET, 
0);
                                break;
                        case 1:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE1_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE1_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE1_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE1_RESET, 
0);
                                break;
                        case 2:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE2_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE2_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE2_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE2_RESET, 
0);
                                break;
                        case 3:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE3_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME1_PIPE3_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE3_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME1_PIPE3_RESET, 
0);
                                break;
                        default:
                                break;
@@ -6959,36 +6986,38 @@ static int gfx_v11_0_reset_compute_pipe(struct 
amdgpu_ring *ring)
                } else {
                        switch (ring->pipe) {
                        case 0:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE0_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE0_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE0_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE0_RESET, 
0);
                                break;
                        case 1:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE1_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE1_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE1_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE1_RESET, 
0);
                                break;
                        case 2:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE2_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE2_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE2_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE2_RESET, 
0);
                                break;
                        case 3:
-                               reset_pipe = REG_SET_FIELD(reset_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE3_RESET, 
1);
-                               clean_pipe = REG_SET_FIELD(clean_pipe, 
CP_MEC_CNTL,
-                                                          MEC_ME2_PIPE3_RESET, 
0);
+                               reset_val = REG_SET_FIELD(reset_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE3_RESET, 
1);
+                               clean_val = REG_SET_FIELD(clean_val, 
CP_MEC_CNTL,
+                                                         MEC_ME2_PIPE3_RESET, 
0);
                                break;
                        default:
                                break;
                        }
                        /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */
                }
-               WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
-               WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+               WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_val);
+               gfx_v11_0_clear_hqds_on_mec_pipe(adev, ring->me, ring->pipe);
+               soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+               WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_val);
                r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR));
        }
 
@@ -6996,7 +7025,7 @@ static int gfx_v11_0_reset_compute_pipe(struct 
amdgpu_ring *ring)
        mutex_unlock(&adev->srbm_mutex);
        gfx_v11_0_unset_safe_mode(adev, 0);
 
-       dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", 
ring->name,
+       dev_dbg(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", 
ring->name,
                        r == 0 ? "successfully" : "failed");
        /*FIXME:Sometimes driver can't cache the MEC firmware start PC 
correctly, so the pipe
         * reset status relies on the compute ring test result.
@@ -7016,9 +7045,15 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
        r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0);
        if (r) {
                dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe 
reset\n", r);
+               amdgpu_gfx_mec_pipe_reset_prepare(adev, ring);
                r = gfx_v11_0_reset_compute_pipe(ring);
-               if (r)
+               if (r) {
+                       amdgpu_gfx_mec_pipe_restart_schedulers(adev, ring->me, 
ring->pipe,
+                                                              ring->xcc_id);
                        return r;
+               }
+               return amdgpu_gfx_mec_pipe_reset_recover_queues(adev, ring, 
timedout_fence,
+                                                               
gfx_v11_0_kcq_init_queue);
        }
 
        r = gfx_v11_0_kcq_init_queue(ring, true);
-- 
2.49.0

Reply via email to