Re: [PATCH] drm/i915/guc: Add Compute context hint

2024-02-26 Thread Tvrtko Ursulin



On 26/02/2024 08:47, Tvrtko Ursulin wrote:


On 23/02/2024 19:25, Rodrigo Vivi wrote:

On Fri, Feb 23, 2024 at 10:31:41AM -0800, Belgaumkar, Vinay wrote:


On 2/23/2024 12:51 AM, Tvrtko Ursulin wrote:


On 22/02/2024 23:31, Belgaumkar, Vinay wrote:


On 2/22/2024 7:32 AM, Tvrtko Ursulin wrote:


On 21/02/2024 21:28, Rodrigo Vivi wrote:

On Wed, Feb 21, 2024 at 09:42:34AM +, Tvrtko Ursulin wrote:


On 21/02/2024 00:14, Vinay Belgaumkar wrote:

Allow user to provide a context hint. When this is set, KMD will
send a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this
context more slowly.
We also disable waitboost for this context as that
will interfere with
the strategy.

We need to enable the use of Compute strategy during SLPC init, 
but

it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported
using a new param-
I915_PARAM_HAS_COMPUTE_CONTEXT. This flag is true
for all guc submission
enabled platforms since they use SLPC for freq management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint


This allows for setting it for the whole application,
correct? Upsides,
downsides? Are there any plans for per context?


Currently there's no extension on a high level API
(Vulkan/OpenGL/OpenCL/etc)
that would allow the application to hint for
power/freq/latency. So Mesa cannot
decide when to hint. So their solution was to use .drirc and
make per-application
decision.

I would prefer a high level extension for a more granular
and informative
decision. We need to work with that goal, but for now I don't see 
any

cons on this approach.


In principle yeah I doesn't harm to have the option. I am just
not sure how useful this intermediate step this is with its lack
of intra-process granularity.


Cc: Rodrigo Vivi 
Signed-off-by: Vinay Belgaumkar 
---
    drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +++
    .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
    drivers/gpu/drm/i915/gt/intel_rps.c   |  8 +++
    .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h |
21 +++
    drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   |
17 +++
    drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
    .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  7 +++
    drivers/gpu/drm/i915/i915_getparam.c  | 11 ++
    include/uapi/drm/i915_drm.h   | 15 
+

    9 files changed, 89 insertions(+)

diff --git
a/drivers/gpu/drm/i915/gem/i915_gem_context.c
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index dcbfe32fd30c..ceab7dbe9b47 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -879,6 +879,7 @@ static int
set_proto_ctx_param(struct drm_i915_file_private
*fpriv,
   struct i915_gem_proto_context *pc,
   struct drm_i915_gem_context_param *args)
    {
+    struct drm_i915_private *i915 = fpriv->i915;
    int ret = 0;
    switch (args->param) {
@@ -904,6 +905,13 @@ static int
set_proto_ctx_param(struct drm_i915_file_private
*fpriv,
    pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
    break;
+    case I915_CONTEXT_PARAM_IS_COMPUTE:
+    if (!intel_uc_uses_guc_submission(_gt(i915)->uc))
+    ret = -EINVAL;
+    else
+    pc->user_flags |= BIT(UCONTEXT_COMPUTE);
+    break;
+
    case I915_CONTEXT_PARAM_RECOVERABLE:
    if (args->size)
    ret = -EINVAL;
diff --git
a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 03bc7f9d191b..db86d6f6245f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -338,6 +338,7 @@ struct i915_gem_context {
    #define UCONTEXT_BANNABLE    2
    #define UCONTEXT_RECOVERABLE    3
    #define UCONTEXT_PERSISTENCE    4
+#define UCONTEXT_COMPUTE    5


What is the GuC behaviour when
SLPC_CTX_FREQ_REQ_IS_COMPUTE is set for
non-compute engines? Wondering if per intel_context is
what we want instead.
(Which could then be the i915_context_param_engines extension to 
mark

individual contexts as compute strategy.)


Perhaps we should rename this? This is a freq-decision-strategy 
inside
GuC that is there mostly targeting compute workloads that needs 
lower

latency with short burst execution. But the engine itself
doesn't matter.
It can be applied to any engine.


I have no idea if it makes sense for other engines, such as
video, and what would be pros and cons in terms of PnP. But in
the case we end up allowing it on any engine, then at least
userspace name shouldn't be compute. :)

Yes, one of the 

Re: [PATCH] drm/i915/guc: Add Compute context hint

2024-02-26 Thread Tvrtko Ursulin



On 23/02/2024 19:25, Rodrigo Vivi wrote:

On Fri, Feb 23, 2024 at 10:31:41AM -0800, Belgaumkar, Vinay wrote:


On 2/23/2024 12:51 AM, Tvrtko Ursulin wrote:


On 22/02/2024 23:31, Belgaumkar, Vinay wrote:


On 2/22/2024 7:32 AM, Tvrtko Ursulin wrote:


On 21/02/2024 21:28, Rodrigo Vivi wrote:

On Wed, Feb 21, 2024 at 09:42:34AM +, Tvrtko Ursulin wrote:


On 21/02/2024 00:14, Vinay Belgaumkar wrote:

Allow user to provide a context hint. When this is set, KMD will
send a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this
context more slowly.
We also disable waitboost for this context as that
will interfere with
the strategy.

We need to enable the use of Compute strategy during SLPC init, but
it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported
using a new param-
I915_PARAM_HAS_COMPUTE_CONTEXT. This flag is true
for all guc submission
enabled platforms since they use SLPC for freq management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint


This allows for setting it for the whole application,
correct? Upsides,
downsides? Are there any plans for per context?


Currently there's no extension on a high level API
(Vulkan/OpenGL/OpenCL/etc)
that would allow the application to hint for
power/freq/latency. So Mesa cannot
decide when to hint. So their solution was to use .drirc and
make per-application
decision.

I would prefer a high level extension for a more granular
and informative
decision. We need to work with that goal, but for now I don't see any
cons on this approach.


In principle yeah I doesn't harm to have the option. I am just
not sure how useful this intermediate step this is with its lack
of intra-process granularity.


Cc: Rodrigo Vivi 
Signed-off-by: Vinay Belgaumkar 
---
    drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +++
    .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
    drivers/gpu/drm/i915/gt/intel_rps.c   |  8 +++
    .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h |
21 +++
    drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   |
17 +++
    drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
    .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  7 +++
    drivers/gpu/drm/i915/i915_getparam.c  | 11 ++
    include/uapi/drm/i915_drm.h   | 15 +
    9 files changed, 89 insertions(+)

diff --git
a/drivers/gpu/drm/i915/gem/i915_gem_context.c
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index dcbfe32fd30c..ceab7dbe9b47 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -879,6 +879,7 @@ static int
set_proto_ctx_param(struct drm_i915_file_private
*fpriv,
   struct i915_gem_proto_context *pc,
   struct drm_i915_gem_context_param *args)
    {
+    struct drm_i915_private *i915 = fpriv->i915;
    int ret = 0;
    switch (args->param) {
@@ -904,6 +905,13 @@ static int
set_proto_ctx_param(struct drm_i915_file_private
*fpriv,
    pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
    break;
+    case I915_CONTEXT_PARAM_IS_COMPUTE:
+    if (!intel_uc_uses_guc_submission(_gt(i915)->uc))
+    ret = -EINVAL;
+    else
+    pc->user_flags |= BIT(UCONTEXT_COMPUTE);
+    break;
+
    case I915_CONTEXT_PARAM_RECOVERABLE:
    if (args->size)
    ret = -EINVAL;
diff --git
a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 03bc7f9d191b..db86d6f6245f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -338,6 +338,7 @@ struct i915_gem_context {
    #define UCONTEXT_BANNABLE    2
    #define UCONTEXT_RECOVERABLE    3
    #define UCONTEXT_PERSISTENCE    4
+#define UCONTEXT_COMPUTE    5


What is the GuC behaviour when
SLPC_CTX_FREQ_REQ_IS_COMPUTE is set for
non-compute engines? Wondering if per intel_context is
what we want instead.
(Which could then be the i915_context_param_engines extension to mark
individual contexts as compute strategy.)


Perhaps we should rename this? This is a freq-decision-strategy inside
GuC that is there mostly targeting compute workloads that needs lower
latency with short burst execution. But the engine itself
doesn't matter.
It can be applied to any engine.


I have no idea if it makes sense for other engines, such as
video, and what would be pros and cons in terms of PnP. But in
the case we end up allowing it on any engine, then at least
userspace name shouldn't be compute. :)

Yes, one of the suggestions from Daniele was to have something along
the 

Re: [PATCH] drm/i915/guc: Add Compute context hint

2024-02-23 Thread Rodrigo Vivi
On Fri, Feb 23, 2024 at 10:31:41AM -0800, Belgaumkar, Vinay wrote:
> 
> On 2/23/2024 12:51 AM, Tvrtko Ursulin wrote:
> > 
> > On 22/02/2024 23:31, Belgaumkar, Vinay wrote:
> > > 
> > > On 2/22/2024 7:32 AM, Tvrtko Ursulin wrote:
> > > > 
> > > > On 21/02/2024 21:28, Rodrigo Vivi wrote:
> > > > > On Wed, Feb 21, 2024 at 09:42:34AM +, Tvrtko Ursulin wrote:
> > > > > > 
> > > > > > On 21/02/2024 00:14, Vinay Belgaumkar wrote:
> > > > > > > Allow user to provide a context hint. When this is set, KMD will
> > > > > > > send a hint to GuC which results in special handling for this
> > > > > > > context. SLPC will ramp the GT frequency aggressively every time
> > > > > > > it switches to this context. The down freq threshold will also be
> > > > > > > lower so GuC will ramp down the GT freq for this
> > > > > > > context more slowly.
> > > > > > > We also disable waitboost for this context as that
> > > > > > > will interfere with
> > > > > > > the strategy.
> > > > > > > 
> > > > > > > We need to enable the use of Compute strategy during SLPC init, 
> > > > > > > but
> > > > > > > it will apply only to contexts that set this bit during context
> > > > > > > creation.
> > > > > > > 
> > > > > > > Userland can check whether this feature is supported
> > > > > > > using a new param-
> > > > > > > I915_PARAM_HAS_COMPUTE_CONTEXT. This flag is true
> > > > > > > for all guc submission
> > > > > > > enabled platforms since they use SLPC for freq management.
> > > > > > > 
> > > > > > > The Mesa usage model for this flag is here -
> > > > > > > https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint
> > > > > > 
> > > > > > This allows for setting it for the whole application,
> > > > > > correct? Upsides,
> > > > > > downsides? Are there any plans for per context?
> > > > > 
> > > > > Currently there's no extension on a high level API
> > > > > (Vulkan/OpenGL/OpenCL/etc)
> > > > > that would allow the application to hint for
> > > > > power/freq/latency. So Mesa cannot
> > > > > decide when to hint. So their solution was to use .drirc and
> > > > > make per-application
> > > > > decision.
> > > > > 
> > > > > I would prefer a high level extension for a more granular
> > > > > and informative
> > > > > decision. We need to work with that goal, but for now I don't see any
> > > > > cons on this approach.
> > > > 
> > > > In principle yeah I doesn't harm to have the option. I am just
> > > > not sure how useful this intermediate step this is with its lack
> > > > of intra-process granularity.
> > > > 
> > > > > > > Cc: Rodrigo Vivi 
> > > > > > > Signed-off-by: Vinay Belgaumkar 
> > > > > > > ---
> > > > > > >    drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +++
> > > > > > >    .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
> > > > > > >    drivers/gpu/drm/i915/gt/intel_rps.c   |  8 +++
> > > > > > >    .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h |
> > > > > > > 21 +++
> > > > > > >    drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   |
> > > > > > > 17 +++
> > > > > > >    drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
> > > > > > >    .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  7 +++
> > > > > > >    drivers/gpu/drm/i915/i915_getparam.c  | 11 ++
> > > > > > >    include/uapi/drm/i915_drm.h   | 15 
> > > > > > > +
> > > > > > >    9 files changed, 89 insertions(+)
> > > > > > > 
> > > > > > > diff --git
> > > > > > > a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > > > > > > b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > > > > > > index dcbfe32fd30c..ceab7dbe9b47 100644
> > > > > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > > > > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > > > > > > @@ -879,6 +879,7 @@ static int
> > > > > > > set_proto_ctx_param(struct drm_i915_file_private
> > > > > > > *fpriv,
> > > > > > >   struct i915_gem_proto_context *pc,
> > > > > > >   struct drm_i915_gem_context_param *args)
> > > > > > >    {
> > > > > > > +    struct drm_i915_private *i915 = fpriv->i915;
> > > > > > >    int ret = 0;
> > > > > > >    switch (args->param) {
> > > > > > > @@ -904,6 +905,13 @@ static int
> > > > > > > set_proto_ctx_param(struct drm_i915_file_private
> > > > > > > *fpriv,
> > > > > > >    pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
> > > > > > >    break;
> > > > > > > +    case I915_CONTEXT_PARAM_IS_COMPUTE:
> > > > > > > +    if (!intel_uc_uses_guc_submission(_gt(i915)->uc))
> > > > > > > +    ret = -EINVAL;
> > > > > > > +    else
> > > > > > > +    pc->user_flags |= BIT(UCONTEXT_COMPUTE);
> > > > > > > +    break;
> > > > > > > +
> > > > > > >    case I915_CONTEXT_PARAM_RECOVERABLE:
> > > > > > >    if (args->size)
> > > > > > >    ret = -EINVAL;
> > > > > > > diff --git
> > > > > > > 

Re: [PATCH] drm/i915/guc: Add Compute context hint

2024-02-23 Thread Belgaumkar, Vinay



On 2/23/2024 12:51 AM, Tvrtko Ursulin wrote:


On 22/02/2024 23:31, Belgaumkar, Vinay wrote:


On 2/22/2024 7:32 AM, Tvrtko Ursulin wrote:


On 21/02/2024 21:28, Rodrigo Vivi wrote:

On Wed, Feb 21, 2024 at 09:42:34AM +, Tvrtko Ursulin wrote:


On 21/02/2024 00:14, Vinay Belgaumkar wrote:

Allow user to provide a context hint. When this is set, KMD will
send a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this context more 
slowly.
We also disable waitboost for this context as that will interfere 
with

the strategy.

We need to enable the use of Compute strategy during SLPC init, but
it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported using a new 
param-
I915_PARAM_HAS_COMPUTE_CONTEXT. This flag is true for all guc 
submission

enabled platforms since they use SLPC for freq management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint


This allows for setting it for the whole application, correct? 
Upsides,

downsides? Are there any plans for per context?


Currently there's no extension on a high level API 
(Vulkan/OpenGL/OpenCL/etc)
that would allow the application to hint for power/freq/latency. So 
Mesa cannot
decide when to hint. So their solution was to use .drirc and make 
per-application

decision.

I would prefer a high level extension for a more granular and 
informative

decision. We need to work with that goal, but for now I don't see any
cons on this approach.


In principle yeah I doesn't harm to have the option. I am just not 
sure how useful this intermediate step this is with its lack of 
intra-process granularity.



Cc: Rodrigo Vivi 
Signed-off-by: Vinay Belgaumkar 
---
   drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +++
   .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
   drivers/gpu/drm/i915/gt/intel_rps.c   |  8 +++
   .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 
+++
   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 17 
+++

   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  7 +++
   drivers/gpu/drm/i915/i915_getparam.c  | 11 ++
   include/uapi/drm/i915_drm.h   | 15 +
   9 files changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c

index dcbfe32fd30c..ceab7dbe9b47 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,

  struct i915_gem_proto_context *pc,
  struct drm_i915_gem_context_param *args)
   {
+    struct drm_i915_private *i915 = fpriv->i915;
   int ret = 0;
   switch (args->param) {
@@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,

   pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
   break;
+    case I915_CONTEXT_PARAM_IS_COMPUTE:
+    if (!intel_uc_uses_guc_submission(_gt(i915)->uc))
+    ret = -EINVAL;
+    else
+    pc->user_flags |= BIT(UCONTEXT_COMPUTE);
+    break;
+
   case I915_CONTEXT_PARAM_RECOVERABLE:
   if (args->size)
   ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h

index 03bc7f9d191b..db86d6f6245f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -338,6 +338,7 @@ struct i915_gem_context {
   #define UCONTEXT_BANNABLE    2
   #define UCONTEXT_RECOVERABLE    3
   #define UCONTEXT_PERSISTENCE    4
+#define UCONTEXT_COMPUTE    5


What is the GuC behaviour when SLPC_CTX_FREQ_REQ_IS_COMPUTE is set 
for
non-compute engines? Wondering if per intel_context is what we 
want instead.

(Which could then be the i915_context_param_engines extension to mark
individual contexts as compute strategy.)


Perhaps we should rename this? This is a freq-decision-strategy inside
GuC that is there mostly targeting compute workloads that needs lower
latency with short burst execution. But the engine itself doesn't 
matter.

It can be applied to any engine.


I have no idea if it makes sense for other engines, such as video, 
and what would be pros and cons in terms of PnP. But in the case we 
end up allowing it on any engine, then at least userspace name 
shouldn't be compute. :)
Yes, one of the suggestions from Daniele was to have something along 
the lines of UCONTEXT_HIFREQ or something along those lines so we 
don't confuse it with the Compute 

Re: [PATCH] drm/i915/guc: Add Compute context hint

2024-02-23 Thread Tvrtko Ursulin



On 22/02/2024 23:31, Belgaumkar, Vinay wrote:


On 2/22/2024 7:32 AM, Tvrtko Ursulin wrote:


On 21/02/2024 21:28, Rodrigo Vivi wrote:

On Wed, Feb 21, 2024 at 09:42:34AM +, Tvrtko Ursulin wrote:


On 21/02/2024 00:14, Vinay Belgaumkar wrote:

Allow user to provide a context hint. When this is set, KMD will
send a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this context more slowly.
We also disable waitboost for this context as that will interfere with
the strategy.

We need to enable the use of Compute strategy during SLPC init, but
it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported using a new 
param-
I915_PARAM_HAS_COMPUTE_CONTEXT. This flag is true for all guc 
submission

enabled platforms since they use SLPC for freq management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint


This allows for setting it for the whole application, correct? Upsides,
downsides? Are there any plans for per context?


Currently there's no extension on a high level API 
(Vulkan/OpenGL/OpenCL/etc)
that would allow the application to hint for power/freq/latency. So 
Mesa cannot
decide when to hint. So their solution was to use .drirc and make 
per-application

decision.

I would prefer a high level extension for a more granular and 
informative

decision. We need to work with that goal, but for now I don't see any
cons on this approach.


In principle yeah I doesn't harm to have the option. I am just not 
sure how useful this intermediate step this is with its lack of 
intra-process granularity.



Cc: Rodrigo Vivi 
Signed-off-by: Vinay Belgaumkar 
---
   drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +++
   .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
   drivers/gpu/drm/i915/gt/intel_rps.c   |  8 +++
   .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 
+++

   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 17 +++
   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  7 +++
   drivers/gpu/drm/i915/i915_getparam.c  | 11 ++
   include/uapi/drm/i915_drm.h   | 15 +
   9 files changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c

index dcbfe32fd30c..ceab7dbe9b47 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,

  struct i915_gem_proto_context *pc,
  struct drm_i915_gem_context_param *args)
   {
+    struct drm_i915_private *i915 = fpriv->i915;
   int ret = 0;
   switch (args->param) {
@@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,

   pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
   break;
+    case I915_CONTEXT_PARAM_IS_COMPUTE:
+    if (!intel_uc_uses_guc_submission(_gt(i915)->uc))
+    ret = -EINVAL;
+    else
+    pc->user_flags |= BIT(UCONTEXT_COMPUTE);
+    break;
+
   case I915_CONTEXT_PARAM_RECOVERABLE:
   if (args->size)
   ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h

index 03bc7f9d191b..db86d6f6245f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -338,6 +338,7 @@ struct i915_gem_context {
   #define UCONTEXT_BANNABLE    2
   #define UCONTEXT_RECOVERABLE    3
   #define UCONTEXT_PERSISTENCE    4
+#define UCONTEXT_COMPUTE    5


What is the GuC behaviour when SLPC_CTX_FREQ_REQ_IS_COMPUTE is set for
non-compute engines? Wondering if per intel_context is what we want 
instead.

(Which could then be the i915_context_param_engines extension to mark
individual contexts as compute strategy.)


Perhaps we should rename this? This is a freq-decision-strategy inside
GuC that is there mostly targeting compute workloads that needs lower
latency with short burst execution. But the engine itself doesn't 
matter.

It can be applied to any engine.


I have no idea if it makes sense for other engines, such as video, and 
what would be pros and cons in terms of PnP. But in the case we end up 
allowing it on any engine, then at least userspace name shouldn't be 
compute. :)
Yes, one of the suggestions from Daniele was to have something along the 
lines of UCONTEXT_HIFREQ or something along those lines so we don't 
confuse it with the Compute Engine.


Okay, but additional question is would anyone 

Re: [PATCH] drm/i915/guc: Add Compute context hint

2024-02-22 Thread Belgaumkar, Vinay



On 2/22/2024 7:32 AM, Tvrtko Ursulin wrote:


On 21/02/2024 21:28, Rodrigo Vivi wrote:

On Wed, Feb 21, 2024 at 09:42:34AM +, Tvrtko Ursulin wrote:


On 21/02/2024 00:14, Vinay Belgaumkar wrote:

Allow user to provide a context hint. When this is set, KMD will
send a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this context more slowly.
We also disable waitboost for this context as that will interfere with
the strategy.

We need to enable the use of Compute strategy during SLPC init, but
it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported using a new 
param-
I915_PARAM_HAS_COMPUTE_CONTEXT. This flag is true for all guc 
submission

enabled platforms since they use SLPC for freq management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint


This allows for setting it for the whole application, correct? Upsides,
downsides? Are there any plans for per context?


Currently there's no extension on a high level API 
(Vulkan/OpenGL/OpenCL/etc)
that would allow the application to hint for power/freq/latency. So 
Mesa cannot
decide when to hint. So their solution was to use .drirc and make 
per-application

decision.

I would prefer a high level extension for a more granular and 
informative

decision. We need to work with that goal, but for now I don't see any
cons on this approach.


In principle yeah I doesn't harm to have the option. I am just not 
sure how useful this intermediate step this is with its lack of 
intra-process granularity.



Cc: Rodrigo Vivi 
Signed-off-by: Vinay Belgaumkar 
---
   drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +++
   .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
   drivers/gpu/drm/i915/gt/intel_rps.c   |  8 +++
   .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 
+++

   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 17 +++
   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  7 +++
   drivers/gpu/drm/i915/i915_getparam.c  | 11 ++
   include/uapi/drm/i915_drm.h   | 15 +
   9 files changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c

index dcbfe32fd30c..ceab7dbe9b47 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,

  struct i915_gem_proto_context *pc,
  struct drm_i915_gem_context_param *args)
   {
+    struct drm_i915_private *i915 = fpriv->i915;
   int ret = 0;
   switch (args->param) {
@@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,

   pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
   break;
+    case I915_CONTEXT_PARAM_IS_COMPUTE:
+    if (!intel_uc_uses_guc_submission(_gt(i915)->uc))
+    ret = -EINVAL;
+    else
+    pc->user_flags |= BIT(UCONTEXT_COMPUTE);
+    break;
+
   case I915_CONTEXT_PARAM_RECOVERABLE:
   if (args->size)
   ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h

index 03bc7f9d191b..db86d6f6245f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -338,6 +338,7 @@ struct i915_gem_context {
   #define UCONTEXT_BANNABLE    2
   #define UCONTEXT_RECOVERABLE    3
   #define UCONTEXT_PERSISTENCE    4
+#define UCONTEXT_COMPUTE    5


What is the GuC behaviour when SLPC_CTX_FREQ_REQ_IS_COMPUTE is set for
non-compute engines? Wondering if per intel_context is what we want 
instead.

(Which could then be the i915_context_param_engines extension to mark
individual contexts as compute strategy.)


Perhaps we should rename this? This is a freq-decision-strategy inside
GuC that is there mostly targeting compute workloads that needs lower
latency with short burst execution. But the engine itself doesn't 
matter.

It can be applied to any engine.


I have no idea if it makes sense for other engines, such as video, and 
what would be pros and cons in terms of PnP. But in the case we end up 
allowing it on any engine, then at least userspace name shouldn't be 
compute. :)
Yes, one of the suggestions from Daniele was to have something along the 
lines of UCONTEXT_HIFREQ or something along those lines so we don't 
confuse it with the Compute Engine.


Or if we decide to call it compute and only apply to compute engines, 
then I would strongly 

Re: [PATCH] drm/i915/guc: Add Compute context hint

2024-02-22 Thread Tvrtko Ursulin



On 21/02/2024 21:28, Rodrigo Vivi wrote:

On Wed, Feb 21, 2024 at 09:42:34AM +, Tvrtko Ursulin wrote:


On 21/02/2024 00:14, Vinay Belgaumkar wrote:

Allow user to provide a context hint. When this is set, KMD will
send a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this context more slowly.
We also disable waitboost for this context as that will interfere with
the strategy.

We need to enable the use of Compute strategy during SLPC init, but
it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported using a new param-
I915_PARAM_HAS_COMPUTE_CONTEXT. This flag is true for all guc submission
enabled platforms since they use SLPC for freq management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint


This allows for setting it for the whole application, correct? Upsides,
downsides? Are there any plans for per context?


Currently there's no extension on a high level API (Vulkan/OpenGL/OpenCL/etc)
that would allow the application to hint for power/freq/latency. So Mesa cannot
decide when to hint. So their solution was to use .drirc and make 
per-application
decision.

I would prefer a high level extension for a more granular and informative
decision. We need to work with that goal, but for now I don't see any
cons on this approach.


In principle yeah I doesn't harm to have the option. I am just not sure 
how useful this intermediate step this is with its lack of intra-process 
granularity.



Cc: Rodrigo Vivi 
Signed-off-by: Vinay Belgaumkar 
---
   drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +++
   .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
   drivers/gpu/drm/i915/gt/intel_rps.c   |  8 +++
   .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++
   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 17 +++
   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  7 +++
   drivers/gpu/drm/i915/i915_getparam.c  | 11 ++
   include/uapi/drm/i915_drm.h   | 15 +
   9 files changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index dcbfe32fd30c..ceab7dbe9b47 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private 
*fpriv,
   struct i915_gem_proto_context *pc,
   struct drm_i915_gem_context_param *args)
   {
+   struct drm_i915_private *i915 = fpriv->i915;
int ret = 0;
switch (args->param) {
@@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,
pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
break;
+   case I915_CONTEXT_PARAM_IS_COMPUTE:
+   if (!intel_uc_uses_guc_submission(_gt(i915)->uc))
+   ret = -EINVAL;
+   else
+   pc->user_flags |= BIT(UCONTEXT_COMPUTE);
+   break;
+
case I915_CONTEXT_PARAM_RECOVERABLE:
if (args->size)
ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 03bc7f9d191b..db86d6f6245f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -338,6 +338,7 @@ struct i915_gem_context {
   #define UCONTEXT_BANNABLE2
   #define UCONTEXT_RECOVERABLE 3
   #define UCONTEXT_PERSISTENCE 4
+#define UCONTEXT_COMPUTE   5


What is the GuC behaviour when SLPC_CTX_FREQ_REQ_IS_COMPUTE is set for
non-compute engines? Wondering if per intel_context is what we want instead.
(Which could then be the i915_context_param_engines extension to mark
individual contexts as compute strategy.)


Perhaps we should rename this? This is a freq-decision-strategy inside
GuC that is there mostly targeting compute workloads that needs lower
latency with short burst execution. But the engine itself doesn't matter.
It can be applied to any engine.


I have no idea if it makes sense for other engines, such as video, and 
what would be pros and cons in terms of PnP. But in the case we end up 
allowing it on any engine, then at least userspace name shouldn't be 
compute. :)


Or if we decide to call it compute and only apply to compute engines, 
then I would strongly suggest making the uapi per intel_context i.e. the 
set engines extension instead of the GEM context param. Otherwise it 
would be odd 

Re: [PATCH] drm/i915/guc: Add Compute context hint

2024-02-21 Thread Rodrigo Vivi
On Wed, Feb 21, 2024 at 09:42:34AM +, Tvrtko Ursulin wrote:
> 
> On 21/02/2024 00:14, Vinay Belgaumkar wrote:
> > Allow user to provide a context hint. When this is set, KMD will
> > send a hint to GuC which results in special handling for this
> > context. SLPC will ramp the GT frequency aggressively every time
> > it switches to this context. The down freq threshold will also be
> > lower so GuC will ramp down the GT freq for this context more slowly.
> > We also disable waitboost for this context as that will interfere with
> > the strategy.
> > 
> > We need to enable the use of Compute strategy during SLPC init, but
> > it will apply only to contexts that set this bit during context
> > creation.
> > 
> > Userland can check whether this feature is supported using a new param-
> > I915_PARAM_HAS_COMPUTE_CONTEXT. This flag is true for all guc submission
> > enabled platforms since they use SLPC for freq management.
> > 
> > The Mesa usage model for this flag is here -
> > https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint
> 
> This allows for setting it for the whole application, correct? Upsides,
> downsides? Are there any plans for per context?

Currently there's no extension on a high level API (Vulkan/OpenGL/OpenCL/etc)
that would allow the application to hint for power/freq/latency. So Mesa cannot
decide when to hint. So their solution was to use .drirc and make 
per-application
decision.

I would prefer a high level extension for a more granular and informative
decision. We need to work with that goal, but for now I don't see any
cons on this approach.

> 
> > Cc: Rodrigo Vivi 
> > Signed-off-by: Vinay Belgaumkar 
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +++
> >   .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
> >   drivers/gpu/drm/i915/gt/intel_rps.c   |  8 +++
> >   .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++
> >   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 17 +++
> >   drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  7 +++
> >   drivers/gpu/drm/i915/i915_getparam.c  | 11 ++
> >   include/uapi/drm/i915_drm.h   | 15 +
> >   9 files changed, 89 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
> > b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > index dcbfe32fd30c..ceab7dbe9b47 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > @@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct 
> > drm_i915_file_private *fpriv,
> >struct i915_gem_proto_context *pc,
> >struct drm_i915_gem_context_param *args)
> >   {
> > +   struct drm_i915_private *i915 = fpriv->i915;
> > int ret = 0;
> > switch (args->param) {
> > @@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct 
> > drm_i915_file_private *fpriv,
> > pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
> > break;
> > +   case I915_CONTEXT_PARAM_IS_COMPUTE:
> > +   if (!intel_uc_uses_guc_submission(_gt(i915)->uc))
> > +   ret = -EINVAL;
> > +   else
> > +   pc->user_flags |= BIT(UCONTEXT_COMPUTE);
> > +   break;
> > +
> > case I915_CONTEXT_PARAM_RECOVERABLE:
> > if (args->size)
> > ret = -EINVAL;
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
> > b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> > index 03bc7f9d191b..db86d6f6245f 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> > @@ -338,6 +338,7 @@ struct i915_gem_context {
> >   #define UCONTEXT_BANNABLE 2
> >   #define UCONTEXT_RECOVERABLE  3
> >   #define UCONTEXT_PERSISTENCE  4
> > +#define UCONTEXT_COMPUTE   5
> 
> What is the GuC behaviour when SLPC_CTX_FREQ_REQ_IS_COMPUTE is set for
> non-compute engines? Wondering if per intel_context is what we want instead.
> (Which could then be the i915_context_param_engines extension to mark
> individual contexts as compute strategy.)

Perhaps we should rename this? This is a freq-decision-strategy inside
GuC that is there mostly targeting compute workloads that needs lower
latency with short burst execution. But the engine itself doesn't matter.
It can be applied to any engine.

> 
> > /**
> >  * @flags: small set of booleans
> > diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
> > b/drivers/gpu/drm/i915/gt/intel_rps.c
> > index 4feef874e6d6..1ed40cd61b70 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_rps.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_rps.c
> > @@ -24,6 +24,7 @@
> >   #include "intel_pcode.h"
> >   #include "intel_rps.h"
> >   #include "vlv_sideband.h"
> > +#include "../gem/i915_gem_context.h"
> >   

Re: [PATCH] drm/i915/guc: Add Compute context hint

2024-02-21 Thread Tvrtko Ursulin



On 21/02/2024 00:14, Vinay Belgaumkar wrote:

Allow user to provide a context hint. When this is set, KMD will
send a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this context more slowly.
We also disable waitboost for this context as that will interfere with
the strategy.

We need to enable the use of Compute strategy during SLPC init, but
it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported using a new param-
I915_PARAM_HAS_COMPUTE_CONTEXT. This flag is true for all guc submission
enabled platforms since they use SLPC for freq management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint


This allows for setting it for the whole application, correct? Upsides, 
downsides? Are there any plans for per context?



Cc: Rodrigo Vivi 
Signed-off-by: Vinay Belgaumkar 
---
  drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +++
  .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
  drivers/gpu/drm/i915/gt/intel_rps.c   |  8 +++
  .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 17 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  7 +++
  drivers/gpu/drm/i915/i915_getparam.c  | 11 ++
  include/uapi/drm/i915_drm.h   | 15 +
  9 files changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index dcbfe32fd30c..ceab7dbe9b47 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private 
*fpriv,
   struct i915_gem_proto_context *pc,
   struct drm_i915_gem_context_param *args)
  {
+   struct drm_i915_private *i915 = fpriv->i915;
int ret = 0;
  
  	switch (args->param) {

@@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,
pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
break;
  
+	case I915_CONTEXT_PARAM_IS_COMPUTE:

+   if (!intel_uc_uses_guc_submission(_gt(i915)->uc))
+   ret = -EINVAL;
+   else
+   pc->user_flags |= BIT(UCONTEXT_COMPUTE);
+   break;
+
case I915_CONTEXT_PARAM_RECOVERABLE:
if (args->size)
ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 03bc7f9d191b..db86d6f6245f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -338,6 +338,7 @@ struct i915_gem_context {
  #define UCONTEXT_BANNABLE 2
  #define UCONTEXT_RECOVERABLE  3
  #define UCONTEXT_PERSISTENCE  4
+#define UCONTEXT_COMPUTE   5


What is the GuC behaviour when SLPC_CTX_FREQ_REQ_IS_COMPUTE is set for 
non-compute engines? Wondering if per intel_context is what we want 
instead. (Which could then be the i915_context_param_engines extension 
to mark individual contexts as compute strategy.)


  
  	/**

 * @flags: small set of booleans
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 4feef874e6d6..1ed40cd61b70 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -24,6 +24,7 @@
  #include "intel_pcode.h"
  #include "intel_rps.h"
  #include "vlv_sideband.h"
+#include "../gem/i915_gem_context.h"
  #include "../../../platform/x86/intel_ips.h"
  
  #define BUSY_MAX_EI	20u /* ms */

@@ -1018,6 +1019,13 @@ void intel_rps_boost(struct i915_request *rq)
struct intel_rps *rps = _ONCE(rq->engine)->gt->rps;
  
  		if (rps_uses_slpc(rps)) {

+   const struct i915_gem_context *ctx;
+
+   ctx = i915_request_gem_context(rq);
+   if (ctx &&
+   test_bit(UCONTEXT_COMPUTE, >user_flags))
+   return;
+


I think request and intel_context do not own a strong reference to GEM 
context. So at minimum you need a local one obtained under a RCU lock 
with kref_get_unless_zero, as do some other places do.


However.. it may be simpler to just store the flag in 
intel_context->flags. If you carry it over at the time GEM context is 
assigned to intel_context, not only you simplify runtime rules, but you 
get the ability to not set the compute flags for video etc.


It may even make 

[PATCH] drm/i915/guc: Add Compute context hint

2024-02-20 Thread Vinay Belgaumkar
Allow user to provide a context hint. When this is set, KMD will
send a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this context more slowly.
We also disable waitboost for this context as that will interfere with
the strategy.

We need to enable the use of Compute strategy during SLPC init, but
it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported using a new param-
I915_PARAM_HAS_COMPUTE_CONTEXT. This flag is true for all guc submission
enabled platforms since they use SLPC for freq management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint

Cc: Rodrigo Vivi 
Signed-off-by: Vinay Belgaumkar 
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  8 +++
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
 drivers/gpu/drm/i915/gt/intel_rps.c   |  8 +++
 .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h | 21 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 17 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  1 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  7 +++
 drivers/gpu/drm/i915/i915_getparam.c  | 11 ++
 include/uapi/drm/i915_drm.h   | 15 +
 9 files changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index dcbfe32fd30c..ceab7dbe9b47 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private 
*fpriv,
   struct i915_gem_proto_context *pc,
   struct drm_i915_gem_context_param *args)
 {
+   struct drm_i915_private *i915 = fpriv->i915;
int ret = 0;
 
switch (args->param) {
@@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct 
drm_i915_file_private *fpriv,
pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
break;
 
+   case I915_CONTEXT_PARAM_IS_COMPUTE:
+   if (!intel_uc_uses_guc_submission(_gt(i915)->uc))
+   ret = -EINVAL;
+   else
+   pc->user_flags |= BIT(UCONTEXT_COMPUTE);
+   break;
+
case I915_CONTEXT_PARAM_RECOVERABLE:
if (args->size)
ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 03bc7f9d191b..db86d6f6245f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -338,6 +338,7 @@ struct i915_gem_context {
 #define UCONTEXT_BANNABLE  2
 #define UCONTEXT_RECOVERABLE   3
 #define UCONTEXT_PERSISTENCE   4
+#define UCONTEXT_COMPUTE   5
 
/**
 * @flags: small set of booleans
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 4feef874e6d6..1ed40cd61b70 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -24,6 +24,7 @@
 #include "intel_pcode.h"
 #include "intel_rps.h"
 #include "vlv_sideband.h"
+#include "../gem/i915_gem_context.h"
 #include "../../../platform/x86/intel_ips.h"
 
 #define BUSY_MAX_EI20u /* ms */
@@ -1018,6 +1019,13 @@ void intel_rps_boost(struct i915_request *rq)
struct intel_rps *rps = _ONCE(rq->engine)->gt->rps;
 
if (rps_uses_slpc(rps)) {
+   const struct i915_gem_context *ctx;
+
+   ctx = i915_request_gem_context(rq);
+   if (ctx &&
+   test_bit(UCONTEXT_COMPUTE, >user_flags))
+   return;
+
slpc = rps_to_slpc(rps);
 
if (slpc->min_freq_softlimit >= slpc->boost_freq)
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
index 811add10c30d..c34674e797c6 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
@@ -207,6 +207,27 @@ struct slpc_shared_data {
u8 reserved_mode_definition[4096];
 } __packed;
 
+struct slpc_context_frequency_request {
+   u32 frequency_request:16;
+   u32 reserved:12;
+   u32 is_compute:1;
+   u32 ignore_busyness:1;
+   u32 is_minimum:1;
+   u32 is_predefined:1;
+} __packed;
+
+#define SLPC_CTX_FREQ_REQ_IS_COMPUTE   REG_BIT(28)
+
+struct slpc_optimized_strategies {
+   u32 compute:1;
+   u32 async_flip:1;
+   u32 media:1;
+