From: Tvrtko Ursulin <tvrtko.ursu...@intel.com>

Two simple selftests which test that both GT and engine workarounds are
not lost after either a full GPU reset, or after the per-engine ones.

(Including checks that one engine reset is not affecting workarounds not
belonging to itself.)

v2:
 * Rebase for series refactoring.
 * Add spinner for actual engine reset!
 * Add idle reset test as well. (Chris Wilson)
 * Share existing global_reset_lock. (Chris Wilson)

v3:
 * intel_engine_verify_workarounds can be static.
 * API rename. (Chris Wilson)
 * Move global reset lock out of the loop. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Reviewed-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/intel_workarounds.c      |   6 +
 drivers/gpu/drm/i915/selftests/igt_reset.c    |  44 ++++++
 drivers/gpu/drm/i915/selftests/igt_reset.h    |  15 ++
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |  51 ++-----
 .../drm/i915/selftests/intel_workarounds.c    | 142 +++++++++++++++++-
 6 files changed, 212 insertions(+), 47 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/igt_reset.c
 create mode 100644 drivers/gpu/drm/i915/selftests/igt_reset.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 50a8fa8fce64..19b5fe5016bf 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -166,6 +166,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \
        selftests/i915_random.o \
        selftests/i915_selftest.o \
        selftests/igt_flush_test.o \
+       selftests/igt_reset.o \
        selftests/igt_spinner.o
 
 # virtual gpu code
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c 
b/drivers/gpu/drm/i915/intel_workarounds.c
index 161ac61058a8..e7b0c6b98d94 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -1302,5 +1302,11 @@ void intel_engine_apply_workarounds(struct 
intel_engine_cs *engine)
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+static bool intel_engine_verify_workarounds(struct intel_engine_cs *engine,
+                                           const char *from)
+{
+       return wa_list_verify(engine->i915, &engine->wa_list, from);
+}
+
 #include "selftests/intel_workarounds.c"
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c 
b/drivers/gpu/drm/i915/selftests/igt_reset.c
new file mode 100644
index 000000000000..208a966da8ca
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_reset.c
@@ -0,0 +1,44 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "igt_reset.h"
+
+#include "../i915_drv.h"
+#include "../intel_ringbuffer.h"
+
+void igt_global_reset_lock(struct drm_i915_private *i915)
+{
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+
+       pr_debug("%s: current gpu_error=%08lx\n",
+                __func__, i915->gpu_error.flags);
+
+       while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
+               wait_event(i915->gpu_error.reset_queue,
+                          !test_bit(I915_RESET_BACKOFF,
+                                    &i915->gpu_error.flags));
+
+       for_each_engine(engine, i915, id) {
+               while (test_and_set_bit(I915_RESET_ENGINE + id,
+                                       &i915->gpu_error.flags))
+                       wait_on_bit(&i915->gpu_error.flags,
+                                   I915_RESET_ENGINE + id,
+                                   TASK_UNINTERRUPTIBLE);
+       }
+}
+
+void igt_global_reset_unlock(struct drm_i915_private *i915)
+{
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+
+       for_each_engine(engine, i915, id)
+               clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+
+       clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
+       wake_up_all(&i915->gpu_error.reset_queue);
+}
diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.h 
b/drivers/gpu/drm/i915/selftests/igt_reset.h
new file mode 100644
index 000000000000..5f0234d045d5
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_reset.h
@@ -0,0 +1,15 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef __I915_SELFTESTS_IGT_RESET_H__
+#define __I915_SELFTESTS_IGT_RESET_H__
+
+#include "../i915_drv.h"
+
+void igt_global_reset_lock(struct drm_i915_private *i915);
+void igt_global_reset_unlock(struct drm_i915_private *i915);
+
+#endif
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c 
b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index defe671130ab..3ac53496127b 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -27,6 +27,7 @@
 #include "../i915_selftest.h"
 #include "i915_random.h"
 #include "igt_flush_test.h"
+#include "igt_reset.h"
 #include "igt_wedge_me.h"
 
 #include "mock_context.h"
@@ -348,40 +349,6 @@ static int igt_hang_sanitycheck(void *arg)
        return err;
 }
 
-static void global_reset_lock(struct drm_i915_private *i915)
-{
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-
-       pr_debug("%s: current gpu_error=%08lx\n",
-                __func__, i915->gpu_error.flags);
-
-       while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
-               wait_event(i915->gpu_error.reset_queue,
-                          !test_bit(I915_RESET_BACKOFF,
-                                    &i915->gpu_error.flags));
-
-       for_each_engine(engine, i915, id) {
-               while (test_and_set_bit(I915_RESET_ENGINE + id,
-                                       &i915->gpu_error.flags))
-                       wait_on_bit(&i915->gpu_error.flags,
-                                   I915_RESET_ENGINE + id,
-                                   TASK_UNINTERRUPTIBLE);
-       }
-}
-
-static void global_reset_unlock(struct drm_i915_private *i915)
-{
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-
-       for_each_engine(engine, i915, id)
-               clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
-
-       clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
-       wake_up_all(&i915->gpu_error.reset_queue);
-}
-
 static int igt_global_reset(void *arg)
 {
        struct drm_i915_private *i915 = arg;
@@ -390,7 +357,7 @@ static int igt_global_reset(void *arg)
 
        /* Check that we can issue a global GPU reset */
 
-       global_reset_lock(i915);
+       igt_global_reset_lock(i915);
        set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
 
        mutex_lock(&i915->drm.struct_mutex);
@@ -405,7 +372,7 @@ static int igt_global_reset(void *arg)
        mutex_unlock(&i915->drm.struct_mutex);
 
        GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
-       global_reset_unlock(i915);
+       igt_global_reset_unlock(i915);
 
        if (i915_terminally_wedged(&i915->gpu_error))
                err = -EIO;
@@ -936,7 +903,7 @@ static int igt_reset_wait(void *arg)
 
        /* Check that we detect a stuck waiter and issue a reset */
 
-       global_reset_lock(i915);
+       igt_global_reset_lock(i915);
 
        mutex_lock(&i915->drm.struct_mutex);
        err = hang_init(&h, i915);
@@ -988,7 +955,7 @@ static int igt_reset_wait(void *arg)
        hang_fini(&h);
 unlock:
        mutex_unlock(&i915->drm.struct_mutex);
-       global_reset_unlock(i915);
+       igt_global_reset_unlock(i915);
 
        if (i915_terminally_wedged(&i915->gpu_error))
                return -EIO;
@@ -1066,7 +1033,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private 
*i915,
 
        /* Check that we can recover an unbind stuck on a hanging request */
 
-       global_reset_lock(i915);
+       igt_global_reset_lock(i915);
 
        mutex_lock(&i915->drm.struct_mutex);
        err = hang_init(&h, i915);
@@ -1186,7 +1153,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private 
*i915,
        hang_fini(&h);
 unlock:
        mutex_unlock(&i915->drm.struct_mutex);
-       global_reset_unlock(i915);
+       igt_global_reset_unlock(i915);
 
        if (i915_terminally_wedged(&i915->gpu_error))
                return -EIO;
@@ -1266,7 +1233,7 @@ static int igt_reset_queue(void *arg)
 
        /* Check that we replay pending requests following a hang */
 
-       global_reset_lock(i915);
+       igt_global_reset_lock(i915);
 
        mutex_lock(&i915->drm.struct_mutex);
        err = hang_init(&h, i915);
@@ -1397,7 +1364,7 @@ static int igt_reset_queue(void *arg)
        hang_fini(&h);
 unlock:
        mutex_unlock(&i915->drm.struct_mutex);
-       global_reset_unlock(i915);
+       igt_global_reset_unlock(i915);
 
        if (i915_terminally_wedged(&i915->gpu_error))
                return -EIO;
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c 
b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index 80396b3592f5..0c118d18f825 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -6,6 +6,8 @@
 
 #include "../i915_selftest.h"
 
+#include "igt_flush_test.h"
+#include "igt_reset.h"
 #include "igt_spinner.h"
 #include "igt_wedge_me.h"
 #include "mock_context.h"
@@ -290,7 +292,6 @@ static int live_reset_whitelist(void *arg)
 {
        struct drm_i915_private *i915 = arg;
        struct intel_engine_cs *engine = i915->engine[RCS];
-       struct i915_gpu_error *error = &i915->gpu_error;
        struct whitelist w;
        int err = 0;
 
@@ -302,8 +303,7 @@ static int live_reset_whitelist(void *arg)
        if (!whitelist_build(engine, &w))
                return 0;
 
-       set_bit(I915_RESET_BACKOFF, &error->flags);
-       set_bit(I915_RESET_ENGINE + engine->id, &error->flags);
+       igt_global_reset_lock(i915);
 
        if (intel_has_reset_engine(i915)) {
                err = check_whitelist_across_reset(engine,
@@ -322,15 +322,147 @@ static int live_reset_whitelist(void *arg)
        }
 
 out:
-       clear_bit(I915_RESET_ENGINE + engine->id, &error->flags);
-       clear_bit(I915_RESET_BACKOFF, &error->flags);
+       igt_global_reset_unlock(i915);
        return err;
 }
 
+static bool verify_gt_engine_wa(struct drm_i915_private *i915, const char *str)
+{
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       bool ok = true;
+
+       ok &= intel_gt_verify_workarounds(i915, str);
+
+       for_each_engine(engine, i915, id)
+               ok &= intel_engine_verify_workarounds(engine, str);
+
+       return ok;
+}
+
+static int
+live_gpu_reset_gt_engine_workarounds(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       struct i915_gpu_error *error = &i915->gpu_error;
+       bool ok;
+
+       if (!intel_has_gpu_reset(i915))
+               return 0;
+
+       pr_info("Verifying after GPU reset...\n");
+
+       igt_global_reset_lock(i915);
+
+       ok = verify_gt_engine_wa(i915, "before reset");
+       if (!ok)
+               goto out;
+
+       intel_runtime_pm_get(i915);
+       set_bit(I915_RESET_HANDOFF, &error->flags);
+       i915_reset(i915, ALL_ENGINES, "live_workarounds");
+       intel_runtime_pm_put(i915);
+
+       ok = verify_gt_engine_wa(i915, "after reset");
+
+out:
+       igt_global_reset_unlock(i915);
+
+       return ok ? 0 : -ESRCH;
+}
+
+static int
+live_engine_reset_gt_engine_workarounds(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       struct intel_engine_cs *engine;
+       struct i915_gem_context *ctx;
+       struct igt_spinner spin;
+       enum intel_engine_id id;
+       struct i915_request *rq;
+       int ret = 0;
+
+       if (!intel_has_reset_engine(i915))
+               return 0;
+
+       ctx = kernel_context(i915);
+       if (IS_ERR(ctx))
+               return PTR_ERR(ctx);
+
+       igt_global_reset_lock(i915);
+
+       for_each_engine(engine, i915, id) {
+               bool ok;
+
+               pr_info("Verifying after %s reset...\n", engine->name);
+
+               ok = verify_gt_engine_wa(i915, "before reset");
+               if (!ok) {
+                       ret = -ESRCH;
+                       goto err;
+               }
+
+               intel_runtime_pm_get(i915);
+               i915_reset_engine(engine, "live_workarounds");
+               intel_runtime_pm_put(i915);
+
+               ok = verify_gt_engine_wa(i915, "after idle reset");
+               if (!ok) {
+                       ret = -ESRCH;
+                       goto err;
+               }
+
+               ret = igt_spinner_init(&spin, i915);
+               if (ret)
+                       goto err;
+
+               intel_runtime_pm_get(i915);
+
+               rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP);
+               if (IS_ERR(rq)) {
+                       ret = PTR_ERR(rq);
+                       igt_spinner_fini(&spin);
+                       goto err;
+               }
+
+               i915_request_add(rq);
+
+               if (!igt_wait_for_spinner(&spin, rq)) {
+                       pr_err("Spinner failed to start\n");
+                       igt_spinner_fini(&spin);
+                       ret = -ETIMEDOUT;
+                       goto err;
+               }
+
+               i915_reset_engine(engine, "live_workarounds");
+
+               intel_runtime_pm_put(i915);
+
+               igt_spinner_end(&spin);
+               igt_spinner_fini(&spin);
+
+               ok = verify_gt_engine_wa(i915, "after busy reset");
+               if (!ok) {
+                       ret = -ESRCH;
+                       goto err;
+               }
+       }
+
+err:
+       igt_global_reset_unlock(i915);
+       kernel_context_close(ctx);
+
+       igt_flush_test(i915, I915_WAIT_LOCKED);
+
+       return ret;
+}
+
 int intel_workarounds_live_selftests(struct drm_i915_private *i915)
 {
        static const struct i915_subtest tests[] = {
                SUBTEST(live_reset_whitelist),
+               SUBTEST(live_gpu_reset_gt_engine_workarounds),
+               SUBTEST(live_engine_reset_gt_engine_workarounds),
        };
        int err;
 
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to