From: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>

The hangcheck logic will not flag an hang if acthd keeps increasing.
However, if a malformed batch jumps to an invalid offset in the ppgtt it
can potentially continue executing through the whole address space
without triggering the hangcheck mechanism.

This patch adds a test to simulate the issue. I've kept the test running
for more than 10 minutes before killing it on a BDW and no hang occurred.
I've sampled i915_hangcheck_info a few times during the run and got the
following:

Hangcheck active, fires in 468ms
render ring:
        seqno = fffff55e [current fffff55e]
        ACTHD = 0x47df685ecc [current 0x4926b81d90]
        max ACTHD = 0x47df685ecc
        score = 0
        action = 2
        instdone read = 0xffd7ffff 0xffffffff 0xffffffff 0xffffffff
        instdone accu = 0x00000000 0x00000000 0x00000000 0x00000000

Hangcheck active, fires in 424ms
render ring:
        seqno = fffff55e [current fffff55e]
        ACTHD = 0x6c953d3a34 [current 0x6de5e76fa4]
        max ACTHD = 0x6c953d3a34
        score = 0
        action = 2
        instdone read = 0xffd7ffff 0xffffffff 0xffffffff 0xffffffff
        instdone accu = 0x00000000 0x00000000 0x00000000 0x00000000

Hangcheck active, fires in 1692ms
render ring:
        seqno = fffff55e [current fffff55e]
        ACTHD = 0x1f49b0366dc [current 0x1f4dcbd88ec]
        max ACTHD = 0x1f49b0366dc
        score = 0
        action = 2
        instdone read = 0xffd7ffff 0xffffffff 0xffffffff 0xffffffff
        instdone accu = 0x00000000 0x00000000 0x00000000 0x00000000

v2: use the new gem_wait() function (Chris)

v3: switch to unterminated batch and rename test, remove redundant
    check, update test requirements (Chris), update top comment

Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
Cc: Arun Siluvery <arun.siluv...@linux.intel.com>
Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>
---
 tests/drv_hangman.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/tests/drv_hangman.c b/tests/drv_hangman.c
index 8a465cf..2360f26 100644
--- a/tests/drv_hangman.c
+++ b/tests/drv_hangman.c
@@ -288,6 +288,42 @@ static void test_error_state_capture(unsigned ring_id,
        check_error_state(gen, cmd_parser, ring_name, offset);
 }
 
+/* This test covers the case where we end up in an uninitialised area of the
+ * ppgtt and keep executing through it. This is particularly relevant if 48b
+ * ppgtt is enabled because the ppgtt is massively bigger compared to the 32b
+ * case and it takes a lot more time to wrap, so the acthd can potentially keep
+ * increasing for a long time
+ */
+#define NSEC_PER_SEC   1000000000L
+static void hangcheck_unterminated(void)
+{
+       int fd;
+       /* timeout needs to be greater than ~5*hangcheck */
+       int64_t timeout_ns = 100 * NSEC_PER_SEC; /* 100 seconds */
+       struct drm_i915_gem_execbuffer2 execbuf;
+       struct drm_i915_gem_exec_object2 gem_exec;
+       uint32_t handle;
+
+       fd = drm_open_driver(DRIVER_INTEL);
+       igt_require(gem_uses_full_ppgtt(fd));
+       igt_require_hang_ring(fd, 0);
+
+       handle = gem_create(fd, 4096);
+
+       memset(&gem_exec, 0, sizeof(gem_exec));
+       gem_exec.handle = handle;
+
+       memset(&execbuf, 0, sizeof(execbuf));
+       execbuf.buffers_ptr = (uintptr_t)&gem_exec;
+       execbuf.buffer_count = 1;
+       execbuf.batch_len = 8;
+
+       gem_execbuf(fd, &execbuf);
+       igt_assert_eq(gem_wait(fd, handle, &timeout_ns), 0);
+
+       close(fd);
+}
+
 igt_main
 {
        const struct intel_execution_engine *e;
@@ -314,4 +350,7 @@ igt_main
                        test_error_state_capture(e->exec_id | e->flags,
                                                 e->full_name);
        }
+
+       igt_subtest("hangcheck-unterminated")
+               hangcheck_unterminated();
 }
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to