Hold MEC pipe reset asserted, walk every queue on that (me, pipe) and tear
down CP_HQD_ACTIVE / CP_HQD_DEQUEUE_REQUEST via
gfx_v11_0_clear_hqds_on_mec_pipe(), then deassert reset. Avoids releasing
pipe reset while HQDs may still be active.
Legacy (non-RS64) path: read CP_MEC_CNTL for the reset mask instead of
reusing CP_MEC_RS64_CNTL state.
Wire reset_kcq into the new amdgpu_gfx_mec_pipe_reset_{prepare,
recover_queues} helpers so sibling KCQs on the same pipe are stopped,
restarted, and have their fences handled correctly.
Suggested-by: Manu Rastogi <[email protected]>
Suggested-by: Alex Deucher <[email protected]>
Signed-off-by: Jesse Zhang <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 151 +++++++++++++++----------
1 file changed, 93 insertions(+), 58 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index fabdbbd0abb7..0e79336807de 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6874,11 +6874,34 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
return amdgpu_ring_reset_helper_end(ring, timedout_fence);
}
+/* Caller must hold adev->srbm_mutex; pipe reset must be asserted. */
+static void gfx_v11_0_clear_hqds_on_mec_pipe(struct amdgpu_device *adev, u32
me,
+ u32 pipe)
+{
+ unsigned int q;
+ int j;
+
+ for (q = 0; q < adev->gfx.mec.num_queue_per_pipe; q++) {
+ soc21_grbm_select(adev, me, pipe, q, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
+ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &
1))
+ break;
+ udelay(1);
+ }
+ }
+
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0);
+ }
+}
+
static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t reset_pipe = 0, clean_pipe = 0;
+ uint32_t reset_val, clean_val;
int r;
if (!gfx_v11_pipe_reset_support(adev))
@@ -6888,69 +6911,73 @@ static int gfx_v11_0_reset_compute_pipe(struct
amdgpu_ring *ring)
mutex_lock(&adev->srbm_mutex);
soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
- clean_pipe = reset_pipe;
-
if (adev->gfx.rs64_enable) {
+ reset_val = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ clean_val = reset_val;
switch (ring->pipe) {
case 0:
- reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
- MEC_PIPE0_RESET, 1);
- clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
- MEC_PIPE0_RESET, 0);
+ reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 1);
+ clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 0);
break;
case 1:
- reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
- MEC_PIPE1_RESET, 1);
- clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
- MEC_PIPE1_RESET, 0);
+ reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 1);
+ clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 0);
break;
case 2:
- reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
- MEC_PIPE2_RESET, 1);
- clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
- MEC_PIPE2_RESET, 0);
+ reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 1);
+ clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 0);
break;
case 3:
- reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
- MEC_PIPE3_RESET, 1);
- clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
- MEC_PIPE3_RESET, 0);
+ reset_val = REG_SET_FIELD(reset_val, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 1);
+ clean_val = REG_SET_FIELD(clean_val, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 0);
break;
default:
break;
}
- WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
- WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_val);
+ gfx_v11_0_clear_hqds_on_mec_pipe(adev, ring->me, ring->pipe);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_val);
r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
RS64_FW_UC_START_ADDR_LO;
} else {
+ reset_val = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
+ clean_val = reset_val;
+
if (ring->me == 1) {
switch (ring->pipe) {
case 0:
- reset_pipe = REG_SET_FIELD(reset_pipe,
CP_MEC_CNTL,
- MEC_ME1_PIPE0_RESET,
1);
- clean_pipe = REG_SET_FIELD(clean_pipe,
CP_MEC_CNTL,
- MEC_ME1_PIPE0_RESET,
0);
+ reset_val = REG_SET_FIELD(reset_val,
CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET,
1);
+ clean_val = REG_SET_FIELD(clean_val,
CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET,
0);
break;
case 1:
- reset_pipe = REG_SET_FIELD(reset_pipe,
CP_MEC_CNTL,
- MEC_ME1_PIPE1_RESET,
1);
- clean_pipe = REG_SET_FIELD(clean_pipe,
CP_MEC_CNTL,
- MEC_ME1_PIPE1_RESET,
0);
+ reset_val = REG_SET_FIELD(reset_val,
CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET,
1);
+ clean_val = REG_SET_FIELD(clean_val,
CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET,
0);
break;
case 2:
- reset_pipe = REG_SET_FIELD(reset_pipe,
CP_MEC_CNTL,
- MEC_ME1_PIPE2_RESET,
1);
- clean_pipe = REG_SET_FIELD(clean_pipe,
CP_MEC_CNTL,
- MEC_ME1_PIPE2_RESET,
0);
+ reset_val = REG_SET_FIELD(reset_val,
CP_MEC_CNTL,
+ MEC_ME1_PIPE2_RESET,
1);
+ clean_val = REG_SET_FIELD(clean_val,
CP_MEC_CNTL,
+ MEC_ME1_PIPE2_RESET,
0);
break;
case 3:
- reset_pipe = REG_SET_FIELD(reset_pipe,
CP_MEC_CNTL,
- MEC_ME1_PIPE3_RESET,
1);
- clean_pipe = REG_SET_FIELD(clean_pipe,
CP_MEC_CNTL,
- MEC_ME1_PIPE3_RESET,
0);
+ reset_val = REG_SET_FIELD(reset_val,
CP_MEC_CNTL,
+ MEC_ME1_PIPE3_RESET,
1);
+ clean_val = REG_SET_FIELD(clean_val,
CP_MEC_CNTL,
+ MEC_ME1_PIPE3_RESET,
0);
break;
default:
break;
@@ -6959,36 +6986,38 @@ static int gfx_v11_0_reset_compute_pipe(struct
amdgpu_ring *ring)
} else {
switch (ring->pipe) {
case 0:
- reset_pipe = REG_SET_FIELD(reset_pipe,
CP_MEC_CNTL,
- MEC_ME2_PIPE0_RESET,
1);
- clean_pipe = REG_SET_FIELD(clean_pipe,
CP_MEC_CNTL,
- MEC_ME2_PIPE0_RESET,
0);
+ reset_val = REG_SET_FIELD(reset_val,
CP_MEC_CNTL,
+ MEC_ME2_PIPE0_RESET,
1);
+ clean_val = REG_SET_FIELD(clean_val,
CP_MEC_CNTL,
+ MEC_ME2_PIPE0_RESET,
0);
break;
case 1:
- reset_pipe = REG_SET_FIELD(reset_pipe,
CP_MEC_CNTL,
- MEC_ME2_PIPE1_RESET,
1);
- clean_pipe = REG_SET_FIELD(clean_pipe,
CP_MEC_CNTL,
- MEC_ME2_PIPE1_RESET,
0);
+ reset_val = REG_SET_FIELD(reset_val,
CP_MEC_CNTL,
+ MEC_ME2_PIPE1_RESET,
1);
+ clean_val = REG_SET_FIELD(clean_val,
CP_MEC_CNTL,
+ MEC_ME2_PIPE1_RESET,
0);
break;
case 2:
- reset_pipe = REG_SET_FIELD(reset_pipe,
CP_MEC_CNTL,
- MEC_ME2_PIPE2_RESET,
1);
- clean_pipe = REG_SET_FIELD(clean_pipe,
CP_MEC_CNTL,
- MEC_ME2_PIPE2_RESET,
0);
+ reset_val = REG_SET_FIELD(reset_val,
CP_MEC_CNTL,
+ MEC_ME2_PIPE2_RESET,
1);
+ clean_val = REG_SET_FIELD(clean_val,
CP_MEC_CNTL,
+ MEC_ME2_PIPE2_RESET,
0);
break;
case 3:
- reset_pipe = REG_SET_FIELD(reset_pipe,
CP_MEC_CNTL,
- MEC_ME2_PIPE3_RESET,
1);
- clean_pipe = REG_SET_FIELD(clean_pipe,
CP_MEC_CNTL,
- MEC_ME2_PIPE3_RESET,
0);
+ reset_val = REG_SET_FIELD(reset_val,
CP_MEC_CNTL,
+ MEC_ME2_PIPE3_RESET,
1);
+ clean_val = REG_SET_FIELD(clean_val,
CP_MEC_CNTL,
+ MEC_ME2_PIPE3_RESET,
0);
break;
default:
break;
}
/* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */
}
- WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
- WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_val);
+ gfx_v11_0_clear_hqds_on_mec_pipe(adev, ring->me, ring->pipe);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_val);
r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR));
}
@@ -6996,7 +7025,7 @@ static int gfx_v11_0_reset_compute_pipe(struct
amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
gfx_v11_0_unset_safe_mode(adev, 0);
- dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n",
ring->name,
+ dev_dbg(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n",
ring->name,
r == 0 ? "successfully" : "failed");
/*FIXME:Sometimes driver can't cache the MEC firmware start PC
correctly, so the pipe
* reset status relies on the compute ring test result.
@@ -7016,9 +7045,15 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0);
if (r) {
dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe
reset\n", r);
+ amdgpu_gfx_mec_pipe_reset_prepare(adev, ring);
r = gfx_v11_0_reset_compute_pipe(ring);
- if (r)
+ if (r) {
+ amdgpu_gfx_mec_pipe_restart_schedulers(adev, ring->me,
ring->pipe,
+ ring->xcc_id);
return r;
+ }
+ return amdgpu_gfx_mec_pipe_reset_recover_queues(adev, ring,
timedout_fence,
+
gfx_v11_0_kcq_init_queue);
}
r = gfx_v11_0_kcq_init_queue(ring, true);
--
2.49.0