Re: [PATCH v2] drm/i915/guc: Use context hints for GT freq

2024-03-04 Thread Rodrigo Vivi
On Wed, Feb 28, 2024 at 11:58:06AM -0800, Belgaumkar, Vinay wrote:
> 
> On 2/28/2024 4:54 AM, Tvrtko Ursulin wrote:
> > 
> > On 27/02/2024 23:51, Vinay Belgaumkar wrote:
> > > Allow user to provide a low latency context hint. When set, KMD
> > > sends a hint to GuC which results in special handling for this
> > > context. SLPC will ramp the GT frequency aggressively every time
> > > it switches to this context. The down freq threshold will also be
> > > lower so GuC will ramp down the GT freq for this context more slowly.
> > > We also disable waitboost for this context as that will interfere with
> > > the strategy.
> > > 
> > > We need to enable the use of SLPC Compute strategy during init, but
> > > it will apply only to contexts that set this bit during context
> > > creation.
> > > 
> > > Userland can check whether this feature is supported using a new param-
> > > I915_PARAM_HAS_CONTEXT_FREQ_HINTS. This flag is true for all guc
> > > submission
> > > enabled platforms as they use SLPC for frequency management.
> > > 
> > > The Mesa usage model for this flag is here -
> > > https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint
> > > 
> > > v2: Rename flags as per review suggestions (Rodrigo, Tvrtko).
> > > Also, use flag bits in intel_context as it allows finer control for
> > > toggling per engine if needed (Tvrtko).
> > > 
> > > Cc: Rodrigo Vivi 
> > > Cc: Tvrtko Ursulin 
> > > Cc: Sushma Venkatesh Reddy 
> > > Signed-off-by: Vinay Belgaumkar 
> > > ---
> > >   drivers/gpu/drm/i915/gem/i915_gem_context.c   | 15 +++--
> > >   .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
> > >   drivers/gpu/drm/i915/gt/intel_context_types.h |  1 +
> > >   drivers/gpu/drm/i915/gt/intel_rps.c   |  5 +
> > >   .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++
> > >   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 17 +++
> > >   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
> > >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  6 ++
> > >   drivers/gpu/drm/i915/i915_getparam.c  | 12 +++
> > >   include/uapi/drm/i915_drm.h   | 15 +
> > >   10 files changed, 92 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > > b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > > index dcbfe32fd30c..0799cb0b2803 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > > @@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct
> > > drm_i915_file_private *fpriv,
> > >  struct i915_gem_proto_context *pc,
> > >  struct drm_i915_gem_context_param *args)
> > >   {
> > > +    struct drm_i915_private *i915 = fpriv->i915;
> > >   int ret = 0;
> > >     switch (args->param) {
> > > @@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct
> > > drm_i915_file_private *fpriv,
> > >   pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
> > >   break;
> > >   +    case I915_CONTEXT_PARAM_LOW_LATENCY:
> > > +    if (intel_uc_uses_guc_submission(_gt(i915)->uc))
> > > +    pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY);
> > > +    else
> > > +    ret = -EINVAL;
> > > +    break;
> > > +
> > >   case I915_CONTEXT_PARAM_RECOVERABLE:
> > >   if (args->size)
> > >   ret = -EINVAL;
> > > @@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct
> > > intel_context *ce,
> > >   if (sseu.slice_mask && !WARN_ON(ce->engine->class !=
> > > RENDER_CLASS))
> > >   ret = intel_context_reconfigure_sseu(ce, sseu);
> > >   +    if (test_bit(UCONTEXT_LOW_LATENCY, >user_flags))
> > > +    set_bit(CONTEXT_LOW_LATENCY, >flags);
> > 
> > Does not need to be atomic so can use __set_bit as higher up in the
> > function.
> ok.
> > 
> > > +
> > >   return ret;
> > >   }
> > >   @@ -1630,6 +1641,8 @@ i915_gem_create_context(struct
> > > drm_i915_private *i915,
> > >   if (vm)
> > >   ctx->vm = vm;
> > >   +    ctx->user_flags = pc->user_flags;
> > > +
> > 
> > Given how most ctx->something assignments are at the bottom of the
> > function I would stick a comment here saying along the lines of "assign
> > early for intel_context_set_gem called when creating engines".
> ok.
> > 
> > > mutex_init(>engines_mutex);
> > >   if (pc->num_user_engines >= 0) {
> > >   i915_gem_context_set_user_engines(ctx);
> > > @@ -1652,8 +1665,6 @@ i915_gem_create_context(struct
> > > drm_i915_private *i915,
> > >    * is no remap info, it will be a NOP. */
> > >   ctx->remap_slice = ALL_L3_SLICES(i915);
> > >   -    ctx->user_flags = pc->user_flags;
> > > -
> > >   for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
> > >   ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
> > >   diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> > > 

Re: [PATCH v2] drm/i915/guc: Use context hints for GT freq

2024-02-28 Thread Belgaumkar, Vinay



On 2/28/2024 4:54 AM, Tvrtko Ursulin wrote:


On 27/02/2024 23:51, Vinay Belgaumkar wrote:

Allow user to provide a low latency context hint. When set, KMD
sends a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this context more slowly.
We also disable waitboost for this context as that will interfere with
the strategy.

We need to enable the use of SLPC Compute strategy during init, but
it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported using a new param-
I915_PARAM_HAS_CONTEXT_FREQ_HINTS. This flag is true for all guc 
submission

enabled platforms as they use SLPC for frequency management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint

v2: Rename flags as per review suggestions (Rodrigo, Tvrtko).
Also, use flag bits in intel_context as it allows finer control for
toggling per engine if needed (Tvrtko).

Cc: Rodrigo Vivi 
Cc: Tvrtko Ursulin 
Cc: Sushma Venkatesh Reddy 
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 15 +++--
  .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
  drivers/gpu/drm/i915/gt/intel_context_types.h |  1 +
  drivers/gpu/drm/i915/gt/intel_rps.c   |  5 +
  .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 17 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  6 ++
  drivers/gpu/drm/i915/i915_getparam.c  | 12 +++
  include/uapi/drm/i915_drm.h   | 15 +
  10 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c

index dcbfe32fd30c..0799cb0b2803 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,

 struct i915_gem_proto_context *pc,
 struct drm_i915_gem_context_param *args)
  {
+    struct drm_i915_private *i915 = fpriv->i915;
  int ret = 0;
    switch (args->param) {
@@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,

  pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
  break;
  +    case I915_CONTEXT_PARAM_LOW_LATENCY:
+    if (intel_uc_uses_guc_submission(_gt(i915)->uc))
+    pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY);
+    else
+    ret = -EINVAL;
+    break;
+
  case I915_CONTEXT_PARAM_RECOVERABLE:
  if (args->size)
  ret = -EINVAL;
@@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct 
intel_context *ce,
  if (sseu.slice_mask && !WARN_ON(ce->engine->class != 
RENDER_CLASS))

  ret = intel_context_reconfigure_sseu(ce, sseu);
  +    if (test_bit(UCONTEXT_LOW_LATENCY, >user_flags))
+    set_bit(CONTEXT_LOW_LATENCY, >flags);


Does not need to be atomic so can use __set_bit as higher up in the 
function.

ok.



+
  return ret;
  }
  @@ -1630,6 +1641,8 @@ i915_gem_create_context(struct 
drm_i915_private *i915,

  if (vm)
  ctx->vm = vm;
  +    ctx->user_flags = pc->user_flags;
+


Given how most ctx->something assignments are at the bottom of the 
function I would stick a comment here saying along the lines of 
"assign early for intel_context_set_gem called when creating engines".

ok.



mutex_init(>engines_mutex);
  if (pc->num_user_engines >= 0) {
  i915_gem_context_set_user_engines(ctx);
@@ -1652,8 +1665,6 @@ i915_gem_create_context(struct drm_i915_private 
*i915,

   * is no remap info, it will be a NOP. */
  ctx->remap_slice = ALL_L3_SLICES(i915);
  -    ctx->user_flags = pc->user_flags;
-
  for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
  ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
  diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h

index 03bc7f9d191b..b6d97da63d1f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -338,6 +338,7 @@ struct i915_gem_context {
  #define UCONTEXT_BANNABLE    2
  #define UCONTEXT_RECOVERABLE    3
  #define UCONTEXT_PERSISTENCE    4
+#define UCONTEXT_LOW_LATENCY    5
    /**
   * @flags: small set of booleans
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h

index 7eccbd70d89f..ed95a7b57cbb 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ 

Re: [PATCH v2] drm/i915/guc: Use context hints for GT freq

2024-02-28 Thread Tvrtko Ursulin



On 27/02/2024 23:51, Vinay Belgaumkar wrote:

Allow user to provide a low latency context hint. When set, KMD
sends a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this context more slowly.
We also disable waitboost for this context as that will interfere with
the strategy.

We need to enable the use of SLPC Compute strategy during init, but
it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported using a new param-
I915_PARAM_HAS_CONTEXT_FREQ_HINTS. This flag is true for all guc submission
enabled platforms as they use SLPC for frequency management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint

v2: Rename flags as per review suggestions (Rodrigo, Tvrtko).
Also, use flag bits in intel_context as it allows finer control for
toggling per engine if needed (Tvrtko).

Cc: Rodrigo Vivi 
Cc: Tvrtko Ursulin 
Cc: Sushma Venkatesh Reddy 
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 15 +++--
  .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
  drivers/gpu/drm/i915/gt/intel_context_types.h |  1 +
  drivers/gpu/drm/i915/gt/intel_rps.c   |  5 +
  .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 17 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  6 ++
  drivers/gpu/drm/i915/i915_getparam.c  | 12 +++
  include/uapi/drm/i915_drm.h   | 15 +
  10 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index dcbfe32fd30c..0799cb0b2803 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private 
*fpriv,
   struct i915_gem_proto_context *pc,
   struct drm_i915_gem_context_param *args)
  {
+   struct drm_i915_private *i915 = fpriv->i915;
int ret = 0;
  
  	switch (args->param) {

@@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,
pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
break;
  
+	case I915_CONTEXT_PARAM_LOW_LATENCY:

+   if (intel_uc_uses_guc_submission(_gt(i915)->uc))
+   pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY);
+   else
+   ret = -EINVAL;
+   break;
+
case I915_CONTEXT_PARAM_RECOVERABLE:
if (args->size)
ret = -EINVAL;
@@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct intel_context *ce,
if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS))
ret = intel_context_reconfigure_sseu(ce, sseu);
  
+	if (test_bit(UCONTEXT_LOW_LATENCY, >user_flags))

+   set_bit(CONTEXT_LOW_LATENCY, >flags);


Does not need to be atomic so can use __set_bit as higher up in the 
function.



+
return ret;
  }
  
@@ -1630,6 +1641,8 @@ i915_gem_create_context(struct drm_i915_private *i915,

if (vm)
ctx->vm = vm;
  
+	ctx->user_flags = pc->user_flags;

+


Given how most ctx->something assignments are at the bottom of the 
function I would stick a comment here saying along the lines of "assign 
early for intel_context_set_gem called when creating engines".



mutex_init(>engines_mutex);
if (pc->num_user_engines >= 0) {
i915_gem_context_set_user_engines(ctx);
@@ -1652,8 +1665,6 @@ i915_gem_create_context(struct drm_i915_private *i915,
 * is no remap info, it will be a NOP. */
ctx->remap_slice = ALL_L3_SLICES(i915);
  
-	ctx->user_flags = pc->user_flags;

-
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
  
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h

index 03bc7f9d191b..b6d97da63d1f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -338,6 +338,7 @@ struct i915_gem_context {
  #define UCONTEXT_BANNABLE 2
  #define UCONTEXT_RECOVERABLE  3
  #define UCONTEXT_PERSISTENCE  4
+#define UCONTEXT_LOW_LATENCY   5
  
  	/**

 * @flags: small set of booleans
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 

[PATCH v2] drm/i915/guc: Use context hints for GT freq

2024-02-27 Thread Vinay Belgaumkar
Allow user to provide a low latency context hint. When set, KMD
sends a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this context more slowly.
We also disable waitboost for this context as that will interfere with
the strategy.

We need to enable the use of SLPC Compute strategy during init, but
it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported using a new param-
I915_PARAM_HAS_CONTEXT_FREQ_HINTS. This flag is true for all guc submission
enabled platforms as they use SLPC for frequency management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint

v2: Rename flags as per review suggestions (Rodrigo, Tvrtko).
Also, use flag bits in intel_context as it allows finer control for
toggling per engine if needed (Tvrtko).

Cc: Rodrigo Vivi 
Cc: Tvrtko Ursulin 
Cc: Sushma Venkatesh Reddy 
Signed-off-by: Vinay Belgaumkar 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 15 +++--
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |  1 +
 drivers/gpu/drm/i915/gt/intel_rps.c   |  5 +
 .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 17 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  6 ++
 drivers/gpu/drm/i915/i915_getparam.c  | 12 +++
 include/uapi/drm/i915_drm.h   | 15 +
 10 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index dcbfe32fd30c..0799cb0b2803 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private 
*fpriv,
   struct i915_gem_proto_context *pc,
   struct drm_i915_gem_context_param *args)
 {
+   struct drm_i915_private *i915 = fpriv->i915;
int ret = 0;
 
switch (args->param) {
@@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,
pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
break;
 
+   case I915_CONTEXT_PARAM_LOW_LATENCY:
+   if (intel_uc_uses_guc_submission(_gt(i915)->uc))
+   pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY);
+   else
+   ret = -EINVAL;
+   break;
+
case I915_CONTEXT_PARAM_RECOVERABLE:
if (args->size)
ret = -EINVAL;
@@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct intel_context *ce,
if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS))
ret = intel_context_reconfigure_sseu(ce, sseu);
 
+   if (test_bit(UCONTEXT_LOW_LATENCY, >user_flags))
+   set_bit(CONTEXT_LOW_LATENCY, >flags);
+
return ret;
 }
 
@@ -1630,6 +1641,8 @@ i915_gem_create_context(struct drm_i915_private *i915,
if (vm)
ctx->vm = vm;
 
+   ctx->user_flags = pc->user_flags;
+
mutex_init(>engines_mutex);
if (pc->num_user_engines >= 0) {
i915_gem_context_set_user_engines(ctx);
@@ -1652,8 +1665,6 @@ i915_gem_create_context(struct drm_i915_private *i915,
 * is no remap info, it will be a NOP. */
ctx->remap_slice = ALL_L3_SLICES(i915);
 
-   ctx->user_flags = pc->user_flags;
-
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 03bc7f9d191b..b6d97da63d1f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -338,6 +338,7 @@ struct i915_gem_context {
 #define UCONTEXT_BANNABLE  2
 #define UCONTEXT_RECOVERABLE   3
 #define UCONTEXT_PERSISTENCE   4
+#define UCONTEXT_LOW_LATENCY   5
 
/**
 * @flags: small set of booleans
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 7eccbd70d89f..ed95a7b57cbb 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -130,6 +130,7 @@ struct intel_context {
 #define CONTEXT_PERMA_PIN  11
 #define CONTEXT_IS_PARKING 12
 #define CONTEXT_EXITING13
+#define CONTEXT_LOW_LATENCY