Re: [Intel-gfx] [PATCH 08/26] drm/i915/guc: Add multi-lrc context registration

2021-10-08 Thread Matthew Brost
On Fri, Oct 08, 2021 at 10:20:16AM -0700, John Harrison wrote:
> On 10/7/2021 12:50, John Harrison wrote:
> > On 10/4/2021 15:06, Matthew Brost wrote:
> > > Add multi-lrc context registration H2G. In addition a workqueue and
> > > process descriptor are setup during multi-lrc context registration as
> > > these data structures are needed for multi-lrc submission.
> > > 
> > > v2:
> > >   (John Harrison)
> > >    - Move GuC specific fields into sub-struct
> > >    - Clean up WQ defines
> > >    - Add comment explaining math to derive WQ / PD address
> > > 
> > > Signed-off-by: Matthew Brost 
> > > ---
> > >   drivers/gpu/drm/i915/gt/intel_context_types.h |  12 ++
> > >   drivers/gpu/drm/i915/gt/intel_lrc.c   |   5 +
> > >   .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
> > >   drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   2 -
> > >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 114 +-
> > >   5 files changed, 131 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h
> > > b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > > index 76dfca57cb45..48decb5ee954 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > > @@ -239,6 +239,18 @@ struct intel_context {
> > >   struct intel_context *parent;
> > >   /** @number_children: number of children if parent */
> > >   u8 number_children;
> > > +    /** @guc: GuC specific members for parallel submission */
> > > +    struct {
> > > +    /** @wqi_head: head pointer in work queue */
> > > +    u16 wqi_head;
> > > +    /** @wqi_tail: tail pointer in work queue */
> > > +    u16 wqi_tail;
> PS: As per comments on previous rev, something somewhere needs to explicitly
> state what WQI means. One suggestion was to do that here, ideally with maybe
> a brief description of what the queue is, how it is used, etc. Although
> probably it would be better kept in a GuC specific file. E.g. added to
> guc_fwif.h in patch #12.
> 

I think this should just be in the main GuC kernel doc. I can include an
update to the kernel DoC in a patch at the end of the next rev of the
series. That patch doesn't necessarily have to included in the initial
merge of parallel submission if it takes a bit more time to review.

Matt 

> John.
> 
> > > +    /**
> > > + * @parent_page: page in context state (ce->state) used
> > > + * by parent for work queue, process descriptor
> > > + */
> > > +    u8 parent_page;
> > > +    } guc;
> > >   } parallel;
> > >     #ifdef CONFIG_DRM_I915_SELFTEST
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > > b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > > index 3ef9eaf8c50e..57339d5c1fc8 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > > @@ -942,6 +942,11 @@ __lrc_alloc_state(struct intel_context *ce,
> > > struct intel_engine_cs *engine)
> > >   context_size += PAGE_SIZE;
> > >   }
> > >   +    if (intel_context_is_parent(ce) &&
> > > intel_engine_uses_guc(engine)) {
> > > +    ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
> > > +    context_size += PAGE_SIZE;
> > > +    }
> > > +
> > >   obj = i915_gem_object_create_lmem(engine->i915, context_size,
> > >     I915_BO_ALLOC_PM_VOLATILE);
> > >   if (IS_ERR(obj))
> > > diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> > > b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> > > index 8ff58aff..ba10bd374cee 100644
> > > --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> > > +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> > > @@ -142,6 +142,7 @@ enum intel_guc_action {
> > >   INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
> > >   INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
> > >   INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
> > > +    INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
> > >   INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
> > >   INTEL_GUC_ACTION_LIMIT
> > >   };
> > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > > b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > > index fa4be13c8854..0eeb2a9feeed 100644
> > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > > @@ -52,8 +52,6 @@
> > >     #define GUC_DOORBELL_INVALID    256
> > >   -#define GUC_WQ_SIZE    (PAGE_SIZE * 2)
> > > -
> > >   /* Work queue item header definitions */
> > >   #define WQ_STATUS_ACTIVE    1
> > >   #define WQ_STATUS_SUSPENDED    2
> > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > index 451d9ae861a6..ab6d7fc1b0b1 100644
> > > --- 

Re: [Intel-gfx] [PATCH 08/26] drm/i915/guc: Add multi-lrc context registration

2021-10-08 Thread John Harrison

On 10/7/2021 12:50, John Harrison wrote:

On 10/4/2021 15:06, Matthew Brost wrote:

Add multi-lrc context registration H2G. In addition a workqueue and
process descriptor are setup during multi-lrc context registration as
these data structures are needed for multi-lrc submission.

v2:
  (John Harrison)
   - Move GuC specific fields into sub-struct
   - Clean up WQ defines
   - Add comment explaining math to derive WQ / PD address

Signed-off-by: Matthew Brost 
---
  drivers/gpu/drm/i915/gt/intel_context_types.h |  12 ++
  drivers/gpu/drm/i915/gt/intel_lrc.c   |   5 +
  .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   2 -
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 114 +-
  5 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h

index 76dfca57cb45..48decb5ee954 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -239,6 +239,18 @@ struct intel_context {
  struct intel_context *parent;
  /** @number_children: number of children if parent */
  u8 number_children;
+    /** @guc: GuC specific members for parallel submission */
+    struct {
+    /** @wqi_head: head pointer in work queue */
+    u16 wqi_head;
+    /** @wqi_tail: tail pointer in work queue */
+    u16 wqi_tail;
PS: As per comments on previous rev, something somewhere needs to 
explicitly state what WQI means. One suggestion was to do that here, 
ideally with maybe a brief description of what the queue is, how it is 
used, etc. Although probably it would be better kept in a GuC specific 
file. E.g. added to guc_fwif.h in patch #12.


John.


+    /**
+ * @parent_page: page in context state (ce->state) used
+ * by parent for work queue, process descriptor
+ */
+    u8 parent_page;
+    } guc;
  } parallel;
    #ifdef CONFIG_DRM_I915_SELFTEST
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c

index 3ef9eaf8c50e..57339d5c1fc8 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -942,6 +942,11 @@ __lrc_alloc_state(struct intel_context *ce, 
struct intel_engine_cs *engine)

  context_size += PAGE_SIZE;
  }
  +    if (intel_context_is_parent(ce) && 
intel_engine_uses_guc(engine)) {

+    ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
+    context_size += PAGE_SIZE;
+    }
+
  obj = i915_gem_object_create_lmem(engine->i915, context_size,
    I915_BO_ALLOC_PM_VOLATILE);
  if (IS_ERR(obj))
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h

index 8ff58aff..ba10bd374cee 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -142,6 +142,7 @@ enum intel_guc_action {
  INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
  INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
  INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
+    INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
  INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
  INTEL_GUC_ACTION_LIMIT
  };
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h

index fa4be13c8854..0eeb2a9feeed 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -52,8 +52,6 @@
    #define GUC_DOORBELL_INVALID    256
  -#define GUC_WQ_SIZE    (PAGE_SIZE * 2)
-
  /* Work queue item header definitions */
  #define WQ_STATUS_ACTIVE    1
  #define WQ_STATUS_SUSPENDED    2
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

index 451d9ae861a6..ab6d7fc1b0b1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -344,6 +344,45 @@ static inline struct i915_priolist 
*to_priolist(struct rb_node *rb)

  return rb_entry(rb, struct i915_priolist, node);
  }
  +/*
+ * When using multi-lrc submission an extra page in the context 
state is

+ * reserved for the process descriptor and work queue.
+ *
+ * The layout of this page is below:
+ * 0    guc_process_desc
+ * ...    unused
+ * PAGE_SIZE / 2    work queue start
+ * ...    work queue
+ * PAGE_SIZE - 1    work queue end
+ */
+#define WQ_SIZE    (PAGE_SIZE / 2)
+#define WQ_OFFSET    (PAGE_SIZE - WQ_SIZE)
I thought you were going with '#define PARENT_SCRATCH SIZE PAGE_SIZE' 
and then using that everywhere else? Unless there is a fundamental 
reason why the above must 

Re: [Intel-gfx] [PATCH 08/26] drm/i915/guc: Add multi-lrc context registration

2021-10-07 Thread Matthew Brost
On Thu, Oct 07, 2021 at 12:50:28PM -0700, John Harrison wrote:
> On 10/4/2021 15:06, Matthew Brost wrote:
> > Add multi-lrc context registration H2G. In addition a workqueue and
> > process descriptor are setup during multi-lrc context registration as
> > these data structures are needed for multi-lrc submission.
> > 
> > v2:
> >   (John Harrison)
> >- Move GuC specific fields into sub-struct
> >- Clean up WQ defines
> >- Add comment explaining math to derive WQ / PD address
> > 
> > Signed-off-by: Matthew Brost 
> > ---
> >   drivers/gpu/drm/i915/gt/intel_context_types.h |  12 ++
> >   drivers/gpu/drm/i915/gt/intel_lrc.c   |   5 +
> >   .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
> >   drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   2 -
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 114 +-
> >   5 files changed, 131 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
> > b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > index 76dfca57cb45..48decb5ee954 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > @@ -239,6 +239,18 @@ struct intel_context {
> > struct intel_context *parent;
> > /** @number_children: number of children if parent */
> > u8 number_children;
> > +   /** @guc: GuC specific members for parallel submission */
> > +   struct {
> > +   /** @wqi_head: head pointer in work queue */
> > +   u16 wqi_head;
> > +   /** @wqi_tail: tail pointer in work queue */
> > +   u16 wqi_tail;
> > +   /**
> > +* @parent_page: page in context state (ce->state) used
> > +* by parent for work queue, process descriptor
> > +*/
> > +   u8 parent_page;
> > +   } guc;
> > } parallel;
> >   #ifdef CONFIG_DRM_I915_SELFTEST
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> > b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 3ef9eaf8c50e..57339d5c1fc8 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -942,6 +942,11 @@ __lrc_alloc_state(struct intel_context *ce, struct 
> > intel_engine_cs *engine)
> > context_size += PAGE_SIZE;
> > }
> > +   if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
> > +   ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
> > +   context_size += PAGE_SIZE;
> > +   }
> > +
> > obj = i915_gem_object_create_lmem(engine->i915, context_size,
> >   I915_BO_ALLOC_PM_VOLATILE);
> > if (IS_ERR(obj))
> > diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h 
> > b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> > index 8ff58aff..ba10bd374cee 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> > @@ -142,6 +142,7 @@ enum intel_guc_action {
> > INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
> > INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
> > INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
> > +   INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
> > INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
> > INTEL_GUC_ACTION_LIMIT
> >   };
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > index fa4be13c8854..0eeb2a9feeed 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> > @@ -52,8 +52,6 @@
> >   #define GUC_DOORBELL_INVALID  256
> > -#define GUC_WQ_SIZE(PAGE_SIZE * 2)
> > -
> >   /* Work queue item header definitions */
> >   #define WQ_STATUS_ACTIVE  1
> >   #define WQ_STATUS_SUSPENDED   2
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index 451d9ae861a6..ab6d7fc1b0b1 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -344,6 +344,45 @@ static inline struct i915_priolist *to_priolist(struct 
> > rb_node *rb)
> > return rb_entry(rb, struct i915_priolist, node);
> >   }
> > +/*
> > + * When using multi-lrc submission an extra page in the context state is
> > + * reserved for the process descriptor and work queue.
> > + *
> > + * The layout of this page is below:
> > + * 0   guc_process_desc
> > + * ... unused
> > + * PAGE_SIZE / 2   work queue start
> > + * ... work queue
> > + * PAGE_SIZE - 1   work queue 

Re: [Intel-gfx] [PATCH 08/26] drm/i915/guc: Add multi-lrc context registration

2021-10-07 Thread John Harrison

On 10/4/2021 15:06, Matthew Brost wrote:

Add multi-lrc context registration H2G. In addition a workqueue and
process descriptor are setup during multi-lrc context registration as
these data structures are needed for multi-lrc submission.

v2:
  (John Harrison)
   - Move GuC specific fields into sub-struct
   - Clean up WQ defines
   - Add comment explaining math to derive WQ / PD address

Signed-off-by: Matthew Brost 
---
  drivers/gpu/drm/i915/gt/intel_context_types.h |  12 ++
  drivers/gpu/drm/i915/gt/intel_lrc.c   |   5 +
  .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   2 -
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 114 +-
  5 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 76dfca57cb45..48decb5ee954 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -239,6 +239,18 @@ struct intel_context {
struct intel_context *parent;
/** @number_children: number of children if parent */
u8 number_children;
+   /** @guc: GuC specific members for parallel submission */
+   struct {
+   /** @wqi_head: head pointer in work queue */
+   u16 wqi_head;
+   /** @wqi_tail: tail pointer in work queue */
+   u16 wqi_tail;
+   /**
+* @parent_page: page in context state (ce->state) used
+* by parent for work queue, process descriptor
+*/
+   u8 parent_page;
+   } guc;
} parallel;
  
  #ifdef CONFIG_DRM_I915_SELFTEST

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3ef9eaf8c50e..57339d5c1fc8 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -942,6 +942,11 @@ __lrc_alloc_state(struct intel_context *ce, struct 
intel_engine_cs *engine)
context_size += PAGE_SIZE;
}
  
+	if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {

+   ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
+   context_size += PAGE_SIZE;
+   }
+
obj = i915_gem_object_create_lmem(engine->i915, context_size,
  I915_BO_ALLOC_PM_VOLATILE);
if (IS_ERR(obj))
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 8ff58aff..ba10bd374cee 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -142,6 +142,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
+   INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
INTEL_GUC_ACTION_LIMIT
  };
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index fa4be13c8854..0eeb2a9feeed 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -52,8 +52,6 @@
  
  #define GUC_DOORBELL_INVALID		256
  
-#define GUC_WQ_SIZE			(PAGE_SIZE * 2)

-
  /* Work queue item header definitions */
  #define WQ_STATUS_ACTIVE  1
  #define WQ_STATUS_SUSPENDED   2
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 451d9ae861a6..ab6d7fc1b0b1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -344,6 +344,45 @@ static inline struct i915_priolist *to_priolist(struct 
rb_node *rb)
return rb_entry(rb, struct i915_priolist, node);
  }
  
+/*

+ * When using multi-lrc submission an extra page in the context state is
+ * reserved for the process descriptor and work queue.
+ *
+ * The layout of this page is below:
+ * 0   guc_process_desc
+ * ... unused
+ * PAGE_SIZE / 2   work queue start
+ * ... work queue
+ * PAGE_SIZE - 1   work queue end
+ */
+#define WQ_SIZE(PAGE_SIZE / 2)
+#define WQ_OFFSET  (PAGE_SIZE - WQ_SIZE)
I thought you were going with '#define PARENT_SCRATCH SIZE PAGE_SIZE' 
and then using that everywhere else? Unless there is a fundamental 
reason why the above must be exactly a page in size then I think the 
size should be defined once and re-used 

[Intel-gfx] [PATCH 08/26] drm/i915/guc: Add multi-lrc context registration

2021-10-04 Thread Matthew Brost
Add multi-lrc context registration H2G. In addition a workqueue and
process descriptor are setup during multi-lrc context registration as
these data structures are needed for multi-lrc submission.

v2:
 (John Harrison)
  - Move GuC specific fields into sub-struct
  - Clean up WQ defines
  - Add comment explaining math to derive WQ / PD address

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |  12 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c   |   5 +
 .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   2 -
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 114 +-
 5 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 76dfca57cb45..48decb5ee954 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -239,6 +239,18 @@ struct intel_context {
struct intel_context *parent;
/** @number_children: number of children if parent */
u8 number_children;
+   /** @guc: GuC specific members for parallel submission */
+   struct {
+   /** @wqi_head: head pointer in work queue */
+   u16 wqi_head;
+   /** @wqi_tail: tail pointer in work queue */
+   u16 wqi_tail;
+   /**
+* @parent_page: page in context state (ce->state) used
+* by parent for work queue, process descriptor
+*/
+   u8 parent_page;
+   } guc;
} parallel;
 
 #ifdef CONFIG_DRM_I915_SELFTEST
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3ef9eaf8c50e..57339d5c1fc8 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -942,6 +942,11 @@ __lrc_alloc_state(struct intel_context *ce, struct 
intel_engine_cs *engine)
context_size += PAGE_SIZE;
}
 
+   if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
+   ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
+   context_size += PAGE_SIZE;
+   }
+
obj = i915_gem_object_create_lmem(engine->i915, context_size,
  I915_BO_ALLOC_PM_VOLATILE);
if (IS_ERR(obj))
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 8ff58aff..ba10bd374cee 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -142,6 +142,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
+   INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
INTEL_GUC_ACTION_LIMIT
 };
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index fa4be13c8854..0eeb2a9feeed 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -52,8 +52,6 @@
 
 #define GUC_DOORBELL_INVALID   256
 
-#define GUC_WQ_SIZE(PAGE_SIZE * 2)
-
 /* Work queue item header definitions */
 #define WQ_STATUS_ACTIVE   1
 #define WQ_STATUS_SUSPENDED2
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 451d9ae861a6..ab6d7fc1b0b1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -344,6 +344,45 @@ static inline struct i915_priolist *to_priolist(struct 
rb_node *rb)
return rb_entry(rb, struct i915_priolist, node);
 }
 
+/*
+ * When using multi-lrc submission an extra page in the context state is
+ * reserved for the process descriptor and work queue.
+ *
+ * The layout of this page is below:
+ * 0   guc_process_desc
+ * ... unused
+ * PAGE_SIZE / 2   work queue start
+ * ... work queue
+ * PAGE_SIZE - 1   work queue end
+ */
+#define WQ_SIZE(PAGE_SIZE / 2)
+#define WQ_OFFSET  (PAGE_SIZE - WQ_SIZE)
+static u32 __get_process_desc_offset(struct intel_context *ce)
+{
+   GEM_BUG_ON(!ce->parallel.guc.parent_page);
+
+   return ce->parallel.guc.parent_page * PAGE_SIZE;
+}
+
+static u32 __get_wq_offset(struct intel_context *ce)
+{
+   return __get_process_desc_offset(ce) +