nouveau: Turing channel preemption fix

Alistair Popple Thu, 29 Oct 2020 19:37:24 -0700

Previous hardware allowed a MMU fault to be generated by software to
trigger a context switch for engine recovery. Turing has the capability
to preempt all work from a specific runlist processor and removed the
registers currently used for triggering MMU faults. Attempting to access
these non-existent registers results in further errors, so use the
runlist preemption register instead.


Signed-off-by: Alistair Popple <apop...@nvidia.com>
---
 .../gpu/drm/nouveau/nvkm/engine/fifo/tu102.c  | 43 +------------------
 1 file changed, 2 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c 
b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c
index f2f20a25182f..14e5b70e0255 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c
@@ -144,7 +144,6 @@ tu102_fifo_recover_work(struct work_struct *w)
        for (todo = runm; runl = __ffs(todo), todo; todo &= ~BIT(runl))
                gk104_fifo_runlist_update(fifo, runl);
 
-       nvkm_wr32(device, 0x00262c, runm);
        nvkm_mask(device, 0x002630, runm, 0x00000000);
 }
 
@@ -240,13 +239,11 @@ tu102_fifo_recover_chan(struct nvkm_fifo *base, int chid)
 static void
 tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
 {
-       struct nvkm_engine *engine = fifo->engine[engn].engine;
        struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
        struct nvkm_device *device = subdev->device;
        const u32 runl = fifo->engine[engn].runl;
        const u32 engm = BIT(engn);
        struct gk104_fifo_engine_status status;
-       int mmui = -1;
 
        assert_spin_locked(&fifo->base.lock);
        if (fifo->recover.engm & engm)
@@ -263,44 +260,8 @@ tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
                tu102_fifo_recover_chan(&fifo->base, status.chan->id);
        }
 
-       /* Determine MMU fault ID for the engine, if we're not being
-        * called from the fault handler already.
-        */
-       if (!status.faulted && engine) {
-               mmui = nvkm_top_fault_id(device, engine->subdev.index);
-               if (mmui < 0) {
-                       const struct nvkm_enum *en = fifo->func->fault.engine;
-
-                       for (; en && en->name; en++) {
-                               if (en->data2 == engine->subdev.index) {
-                                       mmui = en->value;
-                                       break;
-                               }
-                       }
-               }
-               WARN_ON(mmui < 0);
-       }
-
-       /* Trigger a MMU fault for the engine.
-        *
-        * No good idea why this is needed, but nvgpu does something similar,
-        * and it makes recovery from CTXSW_TIMEOUT a lot more reliable.
-        */
-       if (mmui >= 0) {
-               nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000100 | mmui);
-
-               /* Wait for fault to trigger. */
-               nvkm_msec(device, 2000,
-                       gk104_fifo_engine_status(fifo, engn, &status);
-                       if (status.faulted)
-                               break;
-               );
-
-               /* Release MMU fault trigger, and ACK the fault. */
-               nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000000);
-               nvkm_wr32(device, 0x00259c, BIT(mmui));
-               nvkm_wr32(device, 0x002100, 0x10000000);
-       }
+       /* Preempt the runlist */
+       nvkm_wr32(device, 0x2638, BIT(runl));
 
        /* Schedule recovery. */
        nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn);
-- 
2.20.1

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau

[Nouveau] [PATCH 5/5] drm/nouveau: Turing channel preemption fix

Reply via email to