Enable the scheduling timeout error interrupt and set it to a low value
to happen periodically, since it can be missed in HW in certain
conditions. Increment a channel-specific counter in software if the
current channel hasn't advanced. Abort the channel once the timeout
limit is hit (with the periodic granularity). The error notifier is set
to NOUVEAU_GEM_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT when this occurs.

A new KEPLER_SET_CHANNEL_TIMEOUT mthd sets the timeout limit, in
milliseconds. The interrupt granularity is set to 100 ms.

Signed-off-by: Konsta Hölttä <[email protected]>
---
 drm/nouveau/include/nvif/class.h     |  8 ++++
 drm/nouveau/nvkm/engine/fifo/gk104.c | 78 +++++++++++++++++++++++++++++++-----
 2 files changed, 75 insertions(+), 11 deletions(-)

diff --git a/drm/nouveau/include/nvif/class.h b/drm/nouveau/include/nvif/class.h
index 381c72d..f9a3647 100644
--- a/drm/nouveau/include/nvif/class.h
+++ b/drm/nouveau/include/nvif/class.h
@@ -620,18 +620,26 @@ struct fermi_a_zbc_depth_v0 {
        __u8  format;
        __u8  index;
        __u8  pad03[5];
        __u32 ds;
        __u32 l2;
 };
 
 #define KEPLER_SET_CHANNEL_PRIORITY                                        
0x42 // XXX
+#define KEPLER_SET_CHANNEL_TIMEOUT                                         
0x43 // XXX
+
 struct kepler_set_channel_priority_v0 {
        __u8  version;
 #define KEPLER_SET_CHANNEL_PRIORITY_LOW                                    0x00
 #define KEPLER_SET_CHANNEL_PRIORITY_MEDIUM                                 0x01
 #define KEPLER_SET_CHANNEL_PRIORITY_HIGH                                   0x02
        __u8 priority;
        __u8  pad03[6];
 };
 
+struct kepler_set_channel_timeout_v0 {
+       __u8  version;
+       __u8  pad03[3];
+       __u32 timeout_ms;
+};
+
 #endif
diff --git a/drm/nouveau/nvkm/engine/fifo/gk104.c 
b/drm/nouveau/nvkm/engine/fifo/gk104.c
index 2bab45e..15360a6 100644
--- a/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -83,18 +83,25 @@ struct gk104_fifo_base {
 struct gk104_fifo_chan {
        struct nvkm_fifo_chan base;
        u32 engine;
        enum {
                STOPPED,
                RUNNING,
                KILLED
        } state;
+       struct {
+               u32 sum_ms;
+               u32 limit_ms;
+               u32 gpfifo_get;
+       } timeout;
 };
 
+#define GRFIFO_TIMEOUT_CHECK_PERIOD_MS 100
+
 
/*******************************************************************************
  * FIFO channel objects
  
******************************************************************************/
 
 static void
 gk104_fifo_runlist_update(struct gk104_fifo_priv *priv, u32 engine)
 {
        struct nvkm_bar *bar = nvkm_bar(priv);
@@ -288,16 +295,21 @@ gk104_fifo_chan_ctor(struct nvkm_object *parent, struct 
nvkm_object *engine,
        nv_wo32(base, 0x94, 0x30000001);
        nv_wo32(base, 0x9c, 0x00000100);
        nv_wo32(base, 0xac, 0x0000001f);
        nv_wo32(base, 0xe8, chan->base.chid);
        nv_wo32(base, 0xb8, 0xf8000000);
        nv_wo32(base, 0xf8, 0x10003080); /* 0x002310 */
        nv_wo32(base, 0xfc, 0x10000010); /* 0x002350 */
        bar->flush(bar);
+
+       chan->timeout.sum_ms = 0;
+       chan->timeout.limit_ms = -1;
+       chan->timeout.gpfifo_get = 0;
+
        return 0;
 }
 
 static int
 gk104_fifo_chan_init(struct nvkm_object *object)
 {
        struct nvkm_gpuobj *base = nv_gpuobj(object->parent);
        struct gk104_fifo_priv *priv = (void *)object->engine;
@@ -379,21 +391,39 @@ gk104_fifo_chan_set_priority(struct nvkm_object *object, 
void *data, u32 size)
                        return -EINVAL;
                }
        }
 
        return ret;
 }
 
 int
+gk104_fifo_chan_set_timeout(struct nvkm_object *object, void *data, u32 size)
+{
+       struct gk104_fifo_chan *chan = (void *)object;
+       union {
+               struct kepler_set_channel_timeout_v0 v0;
+       } *args = data;
+       int ret;
+
+       if (nvif_unpack(args->v0, 0, 0, false)) {
+               chan->timeout.limit_ms = args->v0.timeout_ms;
+       }
+
+       return ret;
+}
+
+int
 gk104_fifo_chan_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 
size)
 {
        switch (mthd) {
        case KEPLER_SET_CHANNEL_PRIORITY:
                return gk104_fifo_chan_set_priority(object, data, size);
+       case KEPLER_SET_CHANNEL_TIMEOUT:
+               return gk104_fifo_chan_set_timeout(object, data, size);
        default:
                break;
        }
        return -EINVAL;
 }
 
 struct nvkm_ofuncs
 gk104_fifo_chan_ofuncs = {
@@ -604,61 +634,83 @@ gk104_fifo_intr_bind(struct gk104_fifo_priv *priv)
 }
 
 static const struct nvkm_enum
 gk104_fifo_sched_reason[] = {
        { 0x0a, "CTXSW_TIMEOUT" },
        {}
 };
 
+static bool
+gk104_fifo_update_timeout(struct gk104_fifo_priv *priv,
+               struct gk104_fifo_chan *chan, u32 dt)
+{
+       u32 gpfifo_get = nv_rd32(priv, 34);
+       if (gpfifo_get == chan->timeout.gpfifo_get) {
+               chan->timeout.sum_ms += dt;
+       } else {
+               chan->timeout.sum_ms = dt;
+       }
+
+       chan->timeout.gpfifo_get = gpfifo_get;
+
+       return chan->timeout.sum_ms > chan->timeout.limit_ms;
+}
+
 static void
 gk104_fifo_intr_sched_ctxsw(struct gk104_fifo_priv *priv)
 {
        struct nvkm_engine *engine;
        struct gk104_fifo_chan *chan;
        u32 engn;
 
        for (engn = 0; engn < ARRAY_SIZE(fifo_engine); engn++) {
                u32 stat = nv_rd32(priv, 0x002640 + (engn * 0x04));
                u32 busy = (stat & 0x80000000);
                u32 next = (stat & 0x07ff0000) >> 16;
-               u32 chsw = (stat & 0x00008000);
-               u32 save = (stat & 0x00004000);
-               u32 load = (stat & 0x00002000);
+               u32 cxsw = (stat & 0x0000e000) >> 13;
                u32 prev = (stat & 0x000007ff);
-               u32 chid = load ? next : prev;
-               (void)save;
+               /* if loading context, take next id */
+               u32 chid = cxsw == 5 ? next : prev;
 
-               if (busy && chsw) {
+               nv_error(priv, "ctxsw eng stat: %08x\n", stat);
+               /* doing context switch? */
+               if (busy && (cxsw >= 5 && cxsw <= 7)) {
                        if (!(chan = (void *)priv->base.channel[chid]))
                                continue;
                        if (!(engine = gk104_fifo_engine(priv, engn)))
                                continue;
 
-                       nvkm_fifo_eevent(&priv->base, chid,
-                                       
NOUVEAU_GEM_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
-
-                       gk104_fifo_recover(priv, engine, chan);
+                       if (gk104_fifo_update_timeout(priv, chan,
+                                               
GRFIFO_TIMEOUT_CHECK_PERIOD_MS)) {
+                               nvkm_fifo_eevent(&priv->base, chid,
+                                               
NOUVEAU_GEM_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
+                               gk104_fifo_recover(priv, engine, chan);
+                       } else {
+                               nv_debug(priv, "fifo waiting for ctxsw %d ms on 
ch %d\n",
+                                               chan->timeout.sum_ms, chid);
+                       }
                }
        }
 }
 
 static void
 gk104_fifo_intr_sched(struct gk104_fifo_priv *priv)
 {
        u32 intr = nv_rd32(priv, 0x00254c);
        u32 code = intr & 0x000000ff;
        const struct nvkm_enum *en;
        char enunk[6] = "";
 
        en = nvkm_enum_find(gk104_fifo_sched_reason, code);
        if (!en)
                snprintf(enunk, sizeof(enunk), "UNK%02x", code);
 
-       nv_error(priv, "SCHED_ERROR [ %s ]\n", en ? en->name : enunk);
+       /* this is a normal situation, not so loud */
+       nv_debug(priv, "SCHED_ERROR [ %s ]\n", en ? en->name : enunk);
 
        switch (code) {
        case 0x0a:
                gk104_fifo_intr_sched_ctxsw(priv);
                break;
        default:
                break;
        }
@@ -1131,18 +1183,22 @@ gk104_fifo_init(struct nvkm_object *object)
        /* PBDMA[n].HCE */
        for (i = 0; i < priv->spoon_nr; i++) {
                nv_wr32(priv, 0x040148 + (i * 0x2000), 0xffffffff); /* INTR */
                nv_wr32(priv, 0x04014c + (i * 0x2000), 0xffffffff); /* INTREN */
        }
 
        nv_wr32(priv, 0x002254, 0x10000000 | priv->user.bar.offset >> 12);
 
+       /* enable interrupts */
        nv_wr32(priv, 0x002100, 0xffffffff);
        nv_wr32(priv, 0x002140, 0x7fffffff);
+
+       /* engine timeout */
+       nv_wr32(priv, 0x002a0c, 0x80000000 | (1000 * 
GRFIFO_TIMEOUT_CHECK_PERIOD_MS));
        return 0;
 }
 
 void
 gk104_fifo_dtor(struct nvkm_object *object)
 {
        struct gk104_fifo_priv *priv = (void *)object;
        int i;
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to