On Thu, Oct 24, 2019 at 6:40 AM Chris Wilson <ch...@chris-wilson.co.uk> wrote:
> Our existing behaviour is to allow contexts and their GPU requests to > persist past the point of closure until the requests are complete. This > allows clients to operate in a 'fire-and-forget' manner where they can > setup a rendering pipeline and hand it over to the display server and > immediately exiting. As the rendering pipeline is kept alive until > completion, the display server (or other consumer) can use the results > in the future and present them to the user. > > However, not all clients want this persistent behaviour and would prefer > that the contexts are cleaned up immediately upon closure. This ensures > that when clients are run without hangchecking, any GPU hang is > terminated with the process and does not continue to hog resources. > > By defining a context property to allow clients to control persistence > explicitly, we can remove the blanket advice to disable hangchecking > that seems to be far too prevalent. > Just to be clear, when you say "disable hangchecking" do you mean disabling it for all processes via a kernel parameter at boot time or a sysfs entry or similar? Or is there some mechanism whereby a context can request no hang checking? > The default behaviour for new controls is the legacy persistence mode. > New clients will have to opt out for immediate cleanup on context > closure. If the hangchecking modparam is disabled, so is persistent > context support -- all contexts will be terminated on closure. > What happens to fences when the context is cancelled? Is it the same behavior as we have today for when a GPU hang is detected and a context is banned? --Jason > Testcase: igt/gem_ctx_persistence > Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> > Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com> > Cc: MichaĆ Winiarski <michal.winiar...@intel.com> > Cc: Jon Bloomfield <jon.bloomfi...@intel.com> > Reviewed-by: Jon Bloomfield <jon.bloomfi...@intel.com> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com> > --- > drivers/gpu/drm/i915/gem/i915_gem_context.c | 50 ++++++++++++++++++- > drivers/gpu/drm/i915/gem/i915_gem_context.h | 15 ++++++ > .../gpu/drm/i915/gem/i915_gem_context_types.h | 1 + > .../gpu/drm/i915/gem/selftests/mock_context.c | 2 + > include/uapi/drm/i915_drm.h | 15 ++++++ > 5 files changed, 82 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c > b/drivers/gpu/drm/i915/gem/i915_gem_context.c > index 55f1f93c0925..0f1bbf5d1a11 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > @@ -437,12 +437,39 @@ static void context_close(struct i915_gem_context > *ctx) > * case we opt to forcibly kill off all remaining requests on > * context close. > */ > - if (!i915_modparams.enable_hangcheck) > + if (!i915_gem_context_is_persistent(ctx) || > + !i915_modparams.enable_hangcheck) > kill_context(ctx); > > i915_gem_context_put(ctx); > } > > +static int __context_set_persistence(struct i915_gem_context *ctx, bool > state) > +{ > + if (i915_gem_context_is_persistent(ctx) == state) > + return 0; > + > + if (state) { > + /* > + * Only contexts that are short-lived [that will expire or > be > + * reset] are allowed to survive past termination. We > require > + * hangcheck to ensure that the persistent requests are > healthy. > + */ > + if (!i915_modparams.enable_hangcheck) > + return -EINVAL; > + > + i915_gem_context_set_persistence(ctx); > + } else { > + /* To cancel a context we use "preempt-to-idle" */ > + if (!(ctx->i915->caps.scheduler & > I915_SCHEDULER_CAP_PREEMPTION)) > + return -ENODEV; > + > + i915_gem_context_clear_persistence(ctx); > + } > + > + return 0; > +} > + > static struct i915_gem_context * > __create_context(struct drm_i915_private *i915) > { > @@ -477,6 +504,7 @@ __create_context(struct drm_i915_private *i915) > > i915_gem_context_set_bannable(ctx); > i915_gem_context_set_recoverable(ctx); > + __context_set_persistence(ctx, true /* cgroup hook? */); > > for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) > ctx->hang_timestamp[i] = jiffies - > CONTEXT_FAST_HANG_JIFFIES; > @@ -633,6 +661,7 @@ i915_gem_context_create_kernel(struct drm_i915_private > *i915, int prio) > return ctx; > > i915_gem_context_clear_bannable(ctx); > + i915_gem_context_set_persistence(ctx); > ctx->sched.priority = I915_USER_PRIORITY(prio); > > GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); > @@ -1743,6 +1772,16 @@ get_engines(struct i915_gem_context *ctx, > return err; > } > > +static int > +set_persistence(struct i915_gem_context *ctx, > + const struct drm_i915_gem_context_param *args) > +{ > + if (args->size) > + return -EINVAL; > + > + return __context_set_persistence(ctx, args->value); > +} > + > static int ctx_setparam(struct drm_i915_file_private *fpriv, > struct i915_gem_context *ctx, > struct drm_i915_gem_context_param *args) > @@ -1820,6 +1859,10 @@ static int ctx_setparam(struct > drm_i915_file_private *fpriv, > ret = set_engines(ctx, args); > break; > > + case I915_CONTEXT_PARAM_PERSISTENCE: > + ret = set_persistence(ctx, args); > + break; > + > case I915_CONTEXT_PARAM_BAN_PERIOD: > default: > ret = -EINVAL; > @@ -2272,6 +2315,11 @@ int i915_gem_context_getparam_ioctl(struct > drm_device *dev, void *data, > ret = get_engines(ctx, args); > break; > > + case I915_CONTEXT_PARAM_PERSISTENCE: > + args->size = 0; > + args->value = i915_gem_context_is_persistent(ctx); > + break; > + > case I915_CONTEXT_PARAM_BAN_PERIOD: > default: > ret = -EINVAL; > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h > b/drivers/gpu/drm/i915/gem/i915_gem_context.h > index cfe80590f0ed..18e50a769a6e 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h > @@ -76,6 +76,21 @@ static inline void > i915_gem_context_clear_recoverable(struct i915_gem_context *c > clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); > } > > +static inline bool i915_gem_context_is_persistent(const struct > i915_gem_context *ctx) > +{ > + return test_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); > +} > + > +static inline void i915_gem_context_set_persistence(struct > i915_gem_context *ctx) > +{ > + set_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); > +} > + > +static inline void i915_gem_context_clear_persistence(struct > i915_gem_context *ctx) > +{ > + clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); > +} > + > static inline bool i915_gem_context_is_banned(const struct > i915_gem_context *ctx) > { > return test_bit(CONTEXT_BANNED, &ctx->flags); > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h > b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h > index fe97b8ba4fda..861d7d92fe9f 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h > @@ -137,6 +137,7 @@ struct i915_gem_context { > #define UCONTEXT_NO_ERROR_CAPTURE 1 > #define UCONTEXT_BANNABLE 2 > #define UCONTEXT_RECOVERABLE 3 > +#define UCONTEXT_PERSISTENCE 4 > > /** > * @flags: small set of booleans > diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c > b/drivers/gpu/drm/i915/gem/selftests/mock_context.c > index 74ddd682c9cd..29b8984f0e47 100644 > --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c > +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c > @@ -22,6 +22,8 @@ mock_context(struct drm_i915_private *i915, > INIT_LIST_HEAD(&ctx->link); > ctx->i915 = i915; > > + i915_gem_context_set_persistence(ctx); > + > mutex_init(&ctx->engines_mutex); > e = default_engines(ctx); > if (IS_ERR(e)) > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 63d40cba97e0..b5fd5e4bd16e 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -1572,6 +1572,21 @@ struct drm_i915_gem_context_param { > * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) > */ > #define I915_CONTEXT_PARAM_ENGINES 0xa > + > +/* > + * I915_CONTEXT_PARAM_PERSISTENCE: > + * > + * Allow the context and active rendering to survive the process until > + * completion. Persistence allows fire-and-forget clients to queue up a > + * bunch of work, hand the output over to a display server and the quit. > + * If the context is not marked as persistent, upon closing (either via > + * an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file > closure > + * or process termination), the context and any outstanding requests will > be > + * cancelled (and exported fences for cancelled requests marked as -EIO). > + * > + * By default, new contexts allow persistence. > + */ > +#define I915_CONTEXT_PARAM_PERSISTENCE 0xb > /* Must be kept compact -- no holes and well documented */ > > __u64 value; > -- > 2.24.0.rc0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx