Re: [PATCH v2] drm/i915/guc: Use context hints for GT freq
On Wed, Feb 28, 2024 at 11:58:06AM -0800, Belgaumkar, Vinay wrote: > > On 2/28/2024 4:54 AM, Tvrtko Ursulin wrote: > > > > On 27/02/2024 23:51, Vinay Belgaumkar wrote: > > > Allow user to provide a low latency context hint. When set, KMD > > > sends a hint to GuC which results in special handling for this > > > context. SLPC will ramp the GT frequency aggressively every time > > > it switches to this context. The down freq threshold will also be > > > lower so GuC will ramp down the GT freq for this context more slowly. > > > We also disable waitboost for this context as that will interfere with > > > the strategy. > > > > > > We need to enable the use of SLPC Compute strategy during init, but > > > it will apply only to contexts that set this bit during context > > > creation. > > > > > > Userland can check whether this feature is supported using a new param- > > > I915_PARAM_HAS_CONTEXT_FREQ_HINTS. This flag is true for all guc > > > submission > > > enabled platforms as they use SLPC for frequency management. > > > > > > The Mesa usage model for this flag is here - > > > https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint > > > > > > v2: Rename flags as per review suggestions (Rodrigo, Tvrtko). > > > Also, use flag bits in intel_context as it allows finer control for > > > toggling per engine if needed (Tvrtko). > > > > > > Cc: Rodrigo Vivi > > > Cc: Tvrtko Ursulin > > > Cc: Sushma Venkatesh Reddy > > > Signed-off-by: Vinay Belgaumkar > > > --- > > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 15 +++-- > > > .../gpu/drm/i915/gem/i915_gem_context_types.h | 1 + > > > drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + > > > drivers/gpu/drm/i915/gt/intel_rps.c | 5 + > > > .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++ > > > drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 17 +++ > > > drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 1 + > > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 ++ > > > drivers/gpu/drm/i915/i915_getparam.c | 12 +++ > > > include/uapi/drm/i915_drm.h | 15 + > > > 10 files changed, 92 insertions(+), 2 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > index dcbfe32fd30c..0799cb0b2803 100644 > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > @@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct > > > drm_i915_file_private *fpriv, > > > struct i915_gem_proto_context *pc, > > > struct drm_i915_gem_context_param *args) > > > { > > > + struct drm_i915_private *i915 = fpriv->i915; > > > int ret = 0; > > > switch (args->param) { > > > @@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct > > > drm_i915_file_private *fpriv, > > > pc->user_flags &= ~BIT(UCONTEXT_BANNABLE); > > > break; > > > + case I915_CONTEXT_PARAM_LOW_LATENCY: > > > + if (intel_uc_uses_guc_submission(_gt(i915)->uc)) > > > + pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY); > > > + else > > > + ret = -EINVAL; > > > + break; > > > + > > > case I915_CONTEXT_PARAM_RECOVERABLE: > > > if (args->size) > > > ret = -EINVAL; > > > @@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct > > > intel_context *ce, > > > if (sseu.slice_mask && !WARN_ON(ce->engine->class != > > > RENDER_CLASS)) > > > ret = intel_context_reconfigure_sseu(ce, sseu); > > > + if (test_bit(UCONTEXT_LOW_LATENCY, >user_flags)) > > > + set_bit(CONTEXT_LOW_LATENCY, >flags); > > > > Does not need to be atomic so can use __set_bit as higher up in the > > function. > ok. > > > > > + > > > return ret; > > > } > > > @@ -1630,6 +1641,8 @@ i915_gem_create_context(struct > > > drm_i915_private *i915, > > > if (vm) > > > ctx->vm = vm; > > > + ctx->user_flags = pc->user_flags; > > > + > > > > Given how most ctx->something assignments are at the bottom of the > > function I would stick a comment here saying along the lines of "assign > > early for intel_context_set_gem called when creating engines". > ok. > > > > > mutex_init(>engines_mutex); > > > if (pc->num_user_engines >= 0) { > > > i915_gem_context_set_user_engines(ctx); > > > @@ -1652,8 +1665,6 @@ i915_gem_create_context(struct > > > drm_i915_private *i915, > > > * is no remap info, it will be a NOP. */ > > > ctx->remap_slice = ALL_L3_SLICES(i915); > > > - ctx->user_flags = pc->user_flags; > > > - > > > for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) > > > ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h > > >
Re: [PATCH v2] drm/i915/guc: Use context hints for GT freq
On 2/28/2024 4:54 AM, Tvrtko Ursulin wrote: On 27/02/2024 23:51, Vinay Belgaumkar wrote: Allow user to provide a low latency context hint. When set, KMD sends a hint to GuC which results in special handling for this context. SLPC will ramp the GT frequency aggressively every time it switches to this context. The down freq threshold will also be lower so GuC will ramp down the GT freq for this context more slowly. We also disable waitboost for this context as that will interfere with the strategy. We need to enable the use of SLPC Compute strategy during init, but it will apply only to contexts that set this bit during context creation. Userland can check whether this feature is supported using a new param- I915_PARAM_HAS_CONTEXT_FREQ_HINTS. This flag is true for all guc submission enabled platforms as they use SLPC for frequency management. The Mesa usage model for this flag is here - https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint v2: Rename flags as per review suggestions (Rodrigo, Tvrtko). Also, use flag bits in intel_context as it allows finer control for toggling per engine if needed (Tvrtko). Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: Sushma Venkatesh Reddy Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 15 +++-- .../gpu/drm/i915/gem/i915_gem_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_rps.c | 5 + .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 17 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 1 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 ++ drivers/gpu/drm/i915/i915_getparam.c | 12 +++ include/uapi/drm/i915_drm.h | 15 + 10 files changed, 92 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index dcbfe32fd30c..0799cb0b2803 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, struct i915_gem_proto_context *pc, struct drm_i915_gem_context_param *args) { + struct drm_i915_private *i915 = fpriv->i915; int ret = 0; switch (args->param) { @@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, pc->user_flags &= ~BIT(UCONTEXT_BANNABLE); break; + case I915_CONTEXT_PARAM_LOW_LATENCY: + if (intel_uc_uses_guc_submission(_gt(i915)->uc)) + pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY); + else + ret = -EINVAL; + break; + case I915_CONTEXT_PARAM_RECOVERABLE: if (args->size) ret = -EINVAL; @@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct intel_context *ce, if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS)) ret = intel_context_reconfigure_sseu(ce, sseu); + if (test_bit(UCONTEXT_LOW_LATENCY, >user_flags)) + set_bit(CONTEXT_LOW_LATENCY, >flags); Does not need to be atomic so can use __set_bit as higher up in the function. ok. + return ret; } @@ -1630,6 +1641,8 @@ i915_gem_create_context(struct drm_i915_private *i915, if (vm) ctx->vm = vm; + ctx->user_flags = pc->user_flags; + Given how most ctx->something assignments are at the bottom of the function I would stick a comment here saying along the lines of "assign early for intel_context_set_gem called when creating engines". ok. mutex_init(>engines_mutex); if (pc->num_user_engines >= 0) { i915_gem_context_set_user_engines(ctx); @@ -1652,8 +1665,6 @@ i915_gem_create_context(struct drm_i915_private *i915, * is no remap info, it will be a NOP. */ ctx->remap_slice = ALL_L3_SLICES(i915); - ctx->user_flags = pc->user_flags; - for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 03bc7f9d191b..b6d97da63d1f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -338,6 +338,7 @@ struct i915_gem_context { #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 #define UCONTEXT_PERSISTENCE 4 +#define UCONTEXT_LOW_LATENCY 5 /** * @flags: small set of booleans diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 7eccbd70d89f..ed95a7b57cbb 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++
Re: [PATCH v2] drm/i915/guc: Use context hints for GT freq
On 27/02/2024 23:51, Vinay Belgaumkar wrote: Allow user to provide a low latency context hint. When set, KMD sends a hint to GuC which results in special handling for this context. SLPC will ramp the GT frequency aggressively every time it switches to this context. The down freq threshold will also be lower so GuC will ramp down the GT freq for this context more slowly. We also disable waitboost for this context as that will interfere with the strategy. We need to enable the use of SLPC Compute strategy during init, but it will apply only to contexts that set this bit during context creation. Userland can check whether this feature is supported using a new param- I915_PARAM_HAS_CONTEXT_FREQ_HINTS. This flag is true for all guc submission enabled platforms as they use SLPC for frequency management. The Mesa usage model for this flag is here - https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint v2: Rename flags as per review suggestions (Rodrigo, Tvrtko). Also, use flag bits in intel_context as it allows finer control for toggling per engine if needed (Tvrtko). Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: Sushma Venkatesh Reddy Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 15 +++-- .../gpu/drm/i915/gem/i915_gem_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_rps.c | 5 + .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 17 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 1 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 ++ drivers/gpu/drm/i915/i915_getparam.c | 12 +++ include/uapi/drm/i915_drm.h | 15 + 10 files changed, 92 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index dcbfe32fd30c..0799cb0b2803 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, struct i915_gem_proto_context *pc, struct drm_i915_gem_context_param *args) { + struct drm_i915_private *i915 = fpriv->i915; int ret = 0; switch (args->param) { @@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, pc->user_flags &= ~BIT(UCONTEXT_BANNABLE); break; + case I915_CONTEXT_PARAM_LOW_LATENCY: + if (intel_uc_uses_guc_submission(_gt(i915)->uc)) + pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY); + else + ret = -EINVAL; + break; + case I915_CONTEXT_PARAM_RECOVERABLE: if (args->size) ret = -EINVAL; @@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct intel_context *ce, if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS)) ret = intel_context_reconfigure_sseu(ce, sseu); + if (test_bit(UCONTEXT_LOW_LATENCY, >user_flags)) + set_bit(CONTEXT_LOW_LATENCY, >flags); Does not need to be atomic so can use __set_bit as higher up in the function. + return ret; } @@ -1630,6 +1641,8 @@ i915_gem_create_context(struct drm_i915_private *i915, if (vm) ctx->vm = vm; + ctx->user_flags = pc->user_flags; + Given how most ctx->something assignments are at the bottom of the function I would stick a comment here saying along the lines of "assign early for intel_context_set_gem called when creating engines". mutex_init(>engines_mutex); if (pc->num_user_engines >= 0) { i915_gem_context_set_user_engines(ctx); @@ -1652,8 +1665,6 @@ i915_gem_create_context(struct drm_i915_private *i915, * is no remap info, it will be a NOP. */ ctx->remap_slice = ALL_L3_SLICES(i915); - ctx->user_flags = pc->user_flags; - for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 03bc7f9d191b..b6d97da63d1f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -338,6 +338,7 @@ struct i915_gem_context { #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 #define UCONTEXT_PERSISTENCE 4 +#define UCONTEXT_LOW_LATENCY 5 /** * @flags: small set of booleans diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index
[PATCH v2] drm/i915/guc: Use context hints for GT freq
Allow user to provide a low latency context hint. When set, KMD sends a hint to GuC which results in special handling for this context. SLPC will ramp the GT frequency aggressively every time it switches to this context. The down freq threshold will also be lower so GuC will ramp down the GT freq for this context more slowly. We also disable waitboost for this context as that will interfere with the strategy. We need to enable the use of SLPC Compute strategy during init, but it will apply only to contexts that set this bit during context creation. Userland can check whether this feature is supported using a new param- I915_PARAM_HAS_CONTEXT_FREQ_HINTS. This flag is true for all guc submission enabled platforms as they use SLPC for frequency management. The Mesa usage model for this flag is here - https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint v2: Rename flags as per review suggestions (Rodrigo, Tvrtko). Also, use flag bits in intel_context as it allows finer control for toggling per engine if needed (Tvrtko). Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: Sushma Venkatesh Reddy Signed-off-by: Vinay Belgaumkar --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 15 +++-- .../gpu/drm/i915/gem/i915_gem_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_rps.c | 5 + .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 17 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 1 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 ++ drivers/gpu/drm/i915/i915_getparam.c | 12 +++ include/uapi/drm/i915_drm.h | 15 + 10 files changed, 92 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index dcbfe32fd30c..0799cb0b2803 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, struct i915_gem_proto_context *pc, struct drm_i915_gem_context_param *args) { + struct drm_i915_private *i915 = fpriv->i915; int ret = 0; switch (args->param) { @@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, pc->user_flags &= ~BIT(UCONTEXT_BANNABLE); break; + case I915_CONTEXT_PARAM_LOW_LATENCY: + if (intel_uc_uses_guc_submission(_gt(i915)->uc)) + pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY); + else + ret = -EINVAL; + break; + case I915_CONTEXT_PARAM_RECOVERABLE: if (args->size) ret = -EINVAL; @@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct intel_context *ce, if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS)) ret = intel_context_reconfigure_sseu(ce, sseu); + if (test_bit(UCONTEXT_LOW_LATENCY, >user_flags)) + set_bit(CONTEXT_LOW_LATENCY, >flags); + return ret; } @@ -1630,6 +1641,8 @@ i915_gem_create_context(struct drm_i915_private *i915, if (vm) ctx->vm = vm; + ctx->user_flags = pc->user_flags; + mutex_init(>engines_mutex); if (pc->num_user_engines >= 0) { i915_gem_context_set_user_engines(ctx); @@ -1652,8 +1665,6 @@ i915_gem_create_context(struct drm_i915_private *i915, * is no remap info, it will be a NOP. */ ctx->remap_slice = ALL_L3_SLICES(i915); - ctx->user_flags = pc->user_flags; - for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 03bc7f9d191b..b6d97da63d1f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -338,6 +338,7 @@ struct i915_gem_context { #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 #define UCONTEXT_PERSISTENCE 4 +#define UCONTEXT_LOW_LATENCY 5 /** * @flags: small set of booleans diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 7eccbd70d89f..ed95a7b57cbb 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -130,6 +130,7 @@ struct intel_context { #define CONTEXT_PERMA_PIN 11 #define CONTEXT_IS_PARKING 12 #define CONTEXT_EXITING13 +#define CONTEXT_LOW_LATENCY