Re: [Intel-gfx] [PATCH 14/15] drm/i915: Disable per-engine reset for Broxton

2017-07-17 Thread Michel Thierry

On 17/07/17 02:11, Chris Wilson wrote:

Triggering a GPU reset for one engine affects another, notably
corrupting the context status buffer (CSB) effectively losing track of
inflight requests.

Adding a few printks:
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ad41836fa5e5..a969456bc0fa 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1953,6 +1953,7 @@ int i915_reset_engine(struct intel_engine_cs *engine)
 goto out;
 }

+   pr_err("Resetting %s\n", engine->name);
 ret = intel_gpu_reset(engine->i915, intel_engine_flag(engine));
 if (ret) {
 /* If we fail here, we expect to fallback to a global reset */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 716e5c9ea222..a72bc35d0870 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -355,6 +355,7 @@ static void execlists_submit_ports(struct intel_engine_cs 
*engine)
 execlists_context_status_change(rq, 
INTEL_CONTEXT_SCHEDULE_IN);
 port_set([n], port_pack(rq, count));
 desc = execlists_update_context(rq);
+   pr_err("%s: in (rq=%x) ctx=%d\n", engine->name, 
rq->global_seqno, upper_32_bits(desc));
 GEM_DEBUG_EXEC(port[n].context_id = 
upper_32_bits(desc));
 } else {
 GEM_BUG_ON(!n);
@@ -594,9 +595,23 @@ static void intel_lrc_irq_handler(unsigned long data)
 if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
 continue;

+   pr_err("%s: out CSB (%x head=%d, tail=%d), ctx=%d, 
rq=%d\n",
+   engine->name,
+   readl(csb_mmio),
+   head, tail,
+   readl(buf+2*head+1),
+   port->context_id);
+
 /* Check the context/desc id for this event matches */
-   GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
-port->context_id);
+   if (readl(buf + 2 * head + 1) != port->context_id) {
+   pr_err("%s: BUG CSB (%x head=%d, tail=%d), ctx=%d, 
rq=%d\n",
+   engine->name,
+   readl(csb_mmio),
+   head, tail,
+   readl(buf+2*head+1),
+   port->context_id);
+   BUG();
+   }

 rq = port_unpack(port, );
 GEM_BUG_ON(count == 0);

Results in:

[ 6423.006602] Resetting rcs0
[ 6423.009080] rcs0: in (rq=fe70) ctx=1
[ 6423.009216] rcs0: in (rq=fe6f) ctx=3
[ 6423.009542] rcs0: out CSB (2 head=1, tail=2), ctx=3, rq=3
[ 6423.009619] Resetting bcs0
[ 6423.009980] rcs0: BUG CSB (0 head=1, tail=2), ctx=0, rq=3

Note that this bug may be affect all machines and not just Broxton,
Broxton is just the first machine on which I have confirmed this bug.


Hopefully this is just broxton being broxton... I think I already sent 
this, but anyway...


Acked-by: Michel Thierry 



Fixes: 142bc7d99bcf ("drm/i915: Modify error handler for per engine hang 
recovery")
Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Michel Thierry 
---
  drivers/gpu/drm/i915/i915_pci.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index a1e6b696bcfa..09d97e0990b7 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -398,6 +398,7 @@ static const struct intel_device_info intel_broxton_info = {
GEN9_LP_FEATURES,
.platform = INTEL_BROXTON,
.ddb_size = 512,
+   .has_reset_engine = false,
  };
  
  static const struct intel_device_info intel_geminilake_info = {



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 14/15] drm/i915: Disable per-engine reset for Broxton

2017-07-17 Thread Chris Wilson
Triggering a GPU reset for one engine affects another, notably
corrupting the context status buffer (CSB) effectively losing track of
inflight requests.

Adding a few printks:
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ad41836fa5e5..a969456bc0fa 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1953,6 +1953,7 @@ int i915_reset_engine(struct intel_engine_cs *engine)
goto out;
}

+   pr_err("Resetting %s\n", engine->name);
ret = intel_gpu_reset(engine->i915, intel_engine_flag(engine));
if (ret) {
/* If we fail here, we expect to fallback to a global reset */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 716e5c9ea222..a72bc35d0870 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -355,6 +355,7 @@ static void execlists_submit_ports(struct intel_engine_cs 
*engine)
execlists_context_status_change(rq, 
INTEL_CONTEXT_SCHEDULE_IN);
port_set([n], port_pack(rq, count));
desc = execlists_update_context(rq);
+   pr_err("%s: in (rq=%x) ctx=%d\n", engine->name, 
rq->global_seqno, upper_32_bits(desc));
GEM_DEBUG_EXEC(port[n].context_id = 
upper_32_bits(desc));
} else {
GEM_BUG_ON(!n);
@@ -594,9 +595,23 @@ static void intel_lrc_irq_handler(unsigned long data)
if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
continue;

+   pr_err("%s: out CSB (%x head=%d, tail=%d), ctx=%d, 
rq=%d\n",
+   engine->name,
+   readl(csb_mmio),
+   head, tail,
+   readl(buf+2*head+1),
+   port->context_id);
+
/* Check the context/desc id for this event matches */
-   GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
-port->context_id);
+   if (readl(buf + 2 * head + 1) != port->context_id) {
+   pr_err("%s: BUG CSB (%x head=%d, tail=%d), 
ctx=%d, rq=%d\n",
+   engine->name,
+   readl(csb_mmio),
+   head, tail,
+   readl(buf+2*head+1),
+   port->context_id);
+   BUG();
+   }

rq = port_unpack(port, );
GEM_BUG_ON(count == 0);

Results in:

[ 6423.006602] Resetting rcs0
[ 6423.009080] rcs0: in (rq=fe70) ctx=1
[ 6423.009216] rcs0: in (rq=fe6f) ctx=3
[ 6423.009542] rcs0: out CSB (2 head=1, tail=2), ctx=3, rq=3
[ 6423.009619] Resetting bcs0
[ 6423.009980] rcs0: BUG CSB (0 head=1, tail=2), ctx=0, rq=3

Note that this bug may be affect all machines and not just Broxton,
Broxton is just the first machine on which I have confirmed this bug.

Fixes: 142bc7d99bcf ("drm/i915: Modify error handler for per engine hang 
recovery")
Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Michel Thierry 
---
 drivers/gpu/drm/i915/i915_pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index a1e6b696bcfa..09d97e0990b7 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -398,6 +398,7 @@ static const struct intel_device_info intel_broxton_info = {
GEN9_LP_FEATURES,
.platform = INTEL_BROXTON,
.ddb_size = 512,
+   .has_reset_engine = false,
 };
 
 static const struct intel_device_info intel_geminilake_info = {
-- 
2.13.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx