Final enablement patch for GPU hang detection using watchdog timeout.
Using the gem_context_setparam ioctl, users can specify the desired
timeout value in milliseconds, and the driver will do the conversion to
'timestamps'.

The _recommended_ default watchdog threshold for video engines is 60 ms,
since this has been _empirically determined_ to be a good compromise for
low-latency requirements and low rate of false positives. The default
register value is ~106ms and the theoretical max value (all 1s) is
353 seconds.

Signed-off-by: Tomas Elf <[email protected]>
Signed-off-by: Arun Siluvery <[email protected]>
Signed-off-by: Michel Thierry <[email protected]>
---
 drivers/gpu/drm/i915/i915_gem_context.c    | 46 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_context.h    |  3 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  8 +-----
 include/uapi/drm/i915_drm.h                |  1 +
 4 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 99c46f4dbde6..c3748878e64c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -440,6 +440,32 @@ i915_gem_context_create_gvt(struct drm_device *dev)
        return ctx;
 }
 
+void i915_context_watchdog_setup(struct i915_gem_context *ctx, u32 value_in_ms)
+{
+       /*
+        * Based on time out value (ms) calculate
+        * timer count thresholds needed based on core frequency.
+        */
+#define TIMER_MILLISECOND 1000
+
+       /*
+        * Timestamp timer resolution = 0.080 uSec,
+        * or 12500000 counts per second
+        */
+#define TIMESTAMP_CNTS_PER_SEC_80NS 12500000
+
+       ctx->watchdog_threshold =
+               ((value_in_ms) *
+                ((TIMESTAMP_CNTS_PER_SEC_80NS) / (TIMER_MILLISECOND)));
+
+       /*
+        * watchdog register must never be programmed to zero. This would
+        * cause the watchdog counter to exceed and not allow the engine to
+        * go into IDLE state
+        */
+       GEM_BUG_ON(ctx->watchdog_threshold == 0);
+}
+
 int i915_gem_context_init(struct drm_i915_private *dev_priv)
 {
        struct i915_gem_context *ctx;
@@ -1056,6 +1082,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
        struct drm_i915_file_private *file_priv = file->driver_priv;
        struct drm_i915_gem_context_param *args = data;
        struct i915_gem_context *ctx;
+       struct intel_engine_cs *engine;
        int ret;
 
        ret = i915_mutex_lock_interruptible(dev);
@@ -1090,6 +1117,15 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
        case I915_CONTEXT_PARAM_BANNABLE:
                args->value = i915_gem_context_is_bannable(ctx);
                break;
+       case I915_CONTEXT_PARAM_WATCHDOG:
+               engine = to_i915(dev)->engine[VCS];
+               if (!engine->emit_start_watchdog)
+                       ret = -EINVAL;
+               else if (args->value)
+                       ret = -EINVAL;
+               else
+                       args->value = ctx->watchdog_threshold;
+               break;
        default:
                ret = -EINVAL;
                break;
@@ -1105,6 +1141,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device 
*dev, void *data,
        struct drm_i915_file_private *file_priv = file->driver_priv;
        struct drm_i915_gem_context_param *args = data;
        struct i915_gem_context *ctx;
+       struct intel_engine_cs *engine;
        int ret;
 
        ret = i915_mutex_lock_interruptible(dev);
@@ -1147,6 +1184,15 @@ int i915_gem_context_setparam_ioctl(struct drm_device 
*dev, void *data,
                else
                        i915_gem_context_clear_bannable(ctx);
                break;
+       case I915_CONTEXT_PARAM_WATCHDOG:
+               engine = to_i915(dev)->engine[VCS];
+               if (!engine->emit_start_watchdog)
+                       ret = -EINVAL;
+               else if (!args->value)
+                       ret = -EINVAL;
+               else
+                       i915_context_watchdog_setup(ctx, args->value);
+               break;
        default:
                ret = -EINVAL;
                break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h 
b/drivers/gpu/drm/i915/i915_gem_context.h
index 0ac750b90f3d..133ed7b413aa 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -176,6 +176,9 @@ struct i915_gem_context {
        /** ban_score: Accumulated score of all hangs caused by this context. */
        int ban_score;
 
+       /** watchdog_threshold: hw watchdog threshold value, in clock counts */
+       u32 watchdog_threshold;
+
        /** remap_slice: Bitmask of cache lines that need remapping */
        u8 remap_slice;
 };
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 348d81c40e81..26c50a6d6158 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1419,12 +1419,6 @@ execbuf_submit(struct i915_execbuffer_params *params,
        bool watchdog_running = false;
        int ret;
 
-       /*
-        * NB: Place-holder until watchdog timeout is enabled through DRM
-        * interface
-        */
-       bool enable_watchdog = false;
-
        ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
        if (ret)
                return ret;
@@ -1488,7 +1482,7 @@ execbuf_submit(struct i915_execbuffer_params *params,
        }
 
        /* Start watchdog timer */
-       if (enable_watchdog) {
+       if (params->ctx->watchdog_threshold != 0) {
                if (!params->engine->emit_start_watchdog)
                        return -EINVAL;
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3554495bef13..e318c4f53a9e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1289,6 +1289,7 @@ struct drm_i915_gem_context_param {
 #define I915_CONTEXT_PARAM_GTT_SIZE    0x3
 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE    0x4
 #define I915_CONTEXT_PARAM_BANNABLE    0x5
+#define I915_CONTEXT_PARAM_WATCHDOG    0x6
        __u64 value;
 };
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to