[PATCH 04/18] drm/i915/guc: Implement GuC submission tasklet

2021-07-21 Thread Matthew Brost
Implement GuC submission tasklet for new interface. The new GuC
interface uses H2G to submit contexts to the GuC. Since H2G use a single
channel, a single tasklet is used for the submission path.

Also the per engine interrupt handler has been updated to disable the
rescheduling of the physical engine tasklet, when using GuC scheduling,
as the physical engine tasklet is no longer used.

In this patch the field, guc_id, has been added to intel_context and is
not assigned. Patches later in the series will assign this value.

v2:
 (John Harrison)
  - Clean up some comments
v3:
 (John Harrison)
  - More comment cleanups

Cc: John Harrison 
Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |   9 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   4 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 231 +-
 3 files changed, 127 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 90026c177105..6d99631d19b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -137,6 +137,15 @@ struct intel_context {
struct intel_sseu sseu;
 
u8 wa_bb_page; /* if set, page num reserved for context workarounds */
+
+   /* GuC scheduling state flags that do not require a lock. */
+   atomic_t guc_sched_state_no_lock;
+
+   /*
+* GuC LRC descriptor ID - Not assigned in this patch but future patches
+* in the series will.
+*/
+   u16 guc_id;
 };
 
 #endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 35783558d261..8c7b92f699f1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -30,6 +30,10 @@ struct intel_guc {
struct intel_guc_log log;
struct intel_guc_ct ct;
 
+   /* Global engine used to submit requests to GuC */
+   struct i915_sched_engine *sched_engine;
+   struct i915_request *stalled_request;
+
/* intel_guc_recv interrupt related state */
spinlock_t irq_lock;
unsigned int msg_enabled_mask;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 23a94a896a0b..ca0717166a27 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -60,6 +60,31 @@
 
 #define GUC_REQUEST_SIZE 64 /* bytes */
 
+/*
+ * Below is a set of functions which control the GuC scheduling state which do
+ * not require a lock as all state transitions are mutually exclusive. i.e. It
+ * is not possible for the context pinning code and submission, for the same
+ * context, to be executing simultaneously. We still need an atomic as it is
+ * possible for some of the bits to changing at the same time though.
+ */
+#define SCHED_STATE_NO_LOCK_ENABLEDBIT(0)
+static inline bool context_enabled(struct intel_context *ce)
+{
+   return (atomic_read(>guc_sched_state_no_lock) &
+   SCHED_STATE_NO_LOCK_ENABLED);
+}
+
+static inline void set_context_enabled(struct intel_context *ce)
+{
+   atomic_or(SCHED_STATE_NO_LOCK_ENABLED, >guc_sched_state_no_lock);
+}
+
+static inline void clr_context_enabled(struct intel_context *ce)
+{
+   atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED,
+  >guc_sched_state_no_lock);
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
return rb_entry(rb, struct i915_priolist, node);
@@ -122,37 +147,29 @@ static inline void set_lrc_desc_registered(struct 
intel_guc *guc, u32 id,
xa_store_irq(>context_lookup, id, ce, GFP_ATOMIC);
 }
 
-static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
+static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 {
-   /* Leaving stub as this function will be used in future patches */
-}
+   int err;
+   struct intel_context *ce = rq->context;
+   u32 action[3];
+   int len = 0;
+   bool enabled = context_enabled(ce);
 
-/*
- * When we're doing submissions using regular execlists backend, writing to
- * ELSP from CPU side is enough to make sure that writes to ringbuffer pages
- * pinned in mappable aperture portion of GGTT are visible to command streamer.
- * Writes done by GuC on our behalf are not guaranteeing such ordering,
- * therefore, to ensure the flush, we're issuing a POSTING READ.
- */
-static void flush_ggtt_writes(struct i915_vma *vma)
-{
-   if (i915_vma_is_map_and_fenceable(vma))
-   intel_uncore_posting_read_fw(vma->vm->gt->uncore,
-GUC_STATUS);
-}
+   if (!enabled) {
+   action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
+   action[len++] = ce->guc_id;
+  

Re: [PATCH 04/18] drm/i915/guc: Implement GuC submission tasklet

2021-07-20 Thread John Harrison

On 7/20/2021 15:39, Matthew Brost wrote:

Implement GuC submission tasklet for new interface. The new GuC
interface uses H2G to submit contexts to the GuC. Since H2G use a single
channel, a single tasklet is used for the submission path.

Also the per engine interrupt handler has been updated to disable the
rescheduling of the physical engine tasklet, when using GuC scheduling,
as the physical engine tasklet is no longer used.

In this patch the field, guc_id, has been added to intel_context and is
not assigned. Patches later in the series will assign this value.

v2:
  (John Harrison)
   - Clean up some comments
v3:
  (John Harrison)
   - More comment cleanups

Cc: John Harrison 
Signed-off-by: Matthew Brost 

Reviewed-by: John Harrison 


---
  drivers/gpu/drm/i915/gt/intel_context_types.h |   9 +
  drivers/gpu/drm/i915/gt/uc/intel_guc.h|   4 +
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 231 +-
  3 files changed, 127 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 90026c177105..6d99631d19b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -137,6 +137,15 @@ struct intel_context {
struct intel_sseu sseu;
  
  	u8 wa_bb_page; /* if set, page num reserved for context workarounds */

+
+   /* GuC scheduling state flags that do not require a lock. */
+   atomic_t guc_sched_state_no_lock;
+
+   /*
+* GuC LRC descriptor ID - Not assigned in this patch but future patches
+* in the series will.
+*/
+   u16 guc_id;
  };
  
  #endif /* __INTEL_CONTEXT_TYPES__ */

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 35783558d261..8c7b92f699f1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -30,6 +30,10 @@ struct intel_guc {
struct intel_guc_log log;
struct intel_guc_ct ct;
  
+	/* Global engine used to submit requests to GuC */

+   struct i915_sched_engine *sched_engine;
+   struct i915_request *stalled_request;
+
/* intel_guc_recv interrupt related state */
spinlock_t irq_lock;
unsigned int msg_enabled_mask;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 23a94a896a0b..ca0717166a27 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -60,6 +60,31 @@
  
  #define GUC_REQUEST_SIZE 64 /* bytes */
  
+/*

+ * Below is a set of functions which control the GuC scheduling state which do
+ * not require a lock as all state transitions are mutually exclusive. i.e. It
+ * is not possible for the context pinning code and submission, for the same
+ * context, to be executing simultaneously. We still need an atomic as it is
+ * possible for some of the bits to changing at the same time though.
+ */
+#define SCHED_STATE_NO_LOCK_ENABLEDBIT(0)
+static inline bool context_enabled(struct intel_context *ce)
+{
+   return (atomic_read(>guc_sched_state_no_lock) &
+   SCHED_STATE_NO_LOCK_ENABLED);
+}
+
+static inline void set_context_enabled(struct intel_context *ce)
+{
+   atomic_or(SCHED_STATE_NO_LOCK_ENABLED, >guc_sched_state_no_lock);
+}
+
+static inline void clr_context_enabled(struct intel_context *ce)
+{
+   atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED,
+  >guc_sched_state_no_lock);
+}
+
  static inline struct i915_priolist *to_priolist(struct rb_node *rb)
  {
return rb_entry(rb, struct i915_priolist, node);
@@ -122,37 +147,29 @@ static inline void set_lrc_desc_registered(struct 
intel_guc *guc, u32 id,
xa_store_irq(>context_lookup, id, ce, GFP_ATOMIC);
  }
  
-static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)

+static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
  {
-   /* Leaving stub as this function will be used in future patches */
-}
+   int err;
+   struct intel_context *ce = rq->context;
+   u32 action[3];
+   int len = 0;
+   bool enabled = context_enabled(ce);
  
-/*

- * When we're doing submissions using regular execlists backend, writing to
- * ELSP from CPU side is enough to make sure that writes to ringbuffer pages
- * pinned in mappable aperture portion of GGTT are visible to command streamer.
- * Writes done by GuC on our behalf are not guaranteeing such ordering,
- * therefore, to ensure the flush, we're issuing a POSTING READ.
- */
-static void flush_ggtt_writes(struct i915_vma *vma)
-{
-   if (i915_vma_is_map_and_fenceable(vma))
-   intel_uncore_posting_read_fw(vma->vm->gt->uncore,
-GUC_STATUS);
-}
+   if (!enabled) {
+   action[len++] = 

[PATCH 04/18] drm/i915/guc: Implement GuC submission tasklet

2021-07-20 Thread Matthew Brost
Implement GuC submission tasklet for new interface. The new GuC
interface uses H2G to submit contexts to the GuC. Since H2G use a single
channel, a single tasklet is used for the submission path.

Also the per engine interrupt handler has been updated to disable the
rescheduling of the physical engine tasklet, when using GuC scheduling,
as the physical engine tasklet is no longer used.

In this patch the field, guc_id, has been added to intel_context and is
not assigned. Patches later in the series will assign this value.

v2:
 (John Harrison)
  - Clean up some comments
v3:
 (John Harrison)
  - More comment cleanups

Cc: John Harrison 
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |   9 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h|   4 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 231 +-
 3 files changed, 127 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 90026c177105..6d99631d19b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -137,6 +137,15 @@ struct intel_context {
struct intel_sseu sseu;
 
u8 wa_bb_page; /* if set, page num reserved for context workarounds */
+
+   /* GuC scheduling state flags that do not require a lock. */
+   atomic_t guc_sched_state_no_lock;
+
+   /*
+* GuC LRC descriptor ID - Not assigned in this patch but future patches
+* in the series will.
+*/
+   u16 guc_id;
 };
 
 #endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 35783558d261..8c7b92f699f1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -30,6 +30,10 @@ struct intel_guc {
struct intel_guc_log log;
struct intel_guc_ct ct;
 
+   /* Global engine used to submit requests to GuC */
+   struct i915_sched_engine *sched_engine;
+   struct i915_request *stalled_request;
+
/* intel_guc_recv interrupt related state */
spinlock_t irq_lock;
unsigned int msg_enabled_mask;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 23a94a896a0b..ca0717166a27 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -60,6 +60,31 @@
 
 #define GUC_REQUEST_SIZE 64 /* bytes */
 
+/*
+ * Below is a set of functions which control the GuC scheduling state which do
+ * not require a lock as all state transitions are mutually exclusive. i.e. It
+ * is not possible for the context pinning code and submission, for the same
+ * context, to be executing simultaneously. We still need an atomic as it is
+ * possible for some of the bits to changing at the same time though.
+ */
+#define SCHED_STATE_NO_LOCK_ENABLEDBIT(0)
+static inline bool context_enabled(struct intel_context *ce)
+{
+   return (atomic_read(>guc_sched_state_no_lock) &
+   SCHED_STATE_NO_LOCK_ENABLED);
+}
+
+static inline void set_context_enabled(struct intel_context *ce)
+{
+   atomic_or(SCHED_STATE_NO_LOCK_ENABLED, >guc_sched_state_no_lock);
+}
+
+static inline void clr_context_enabled(struct intel_context *ce)
+{
+   atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED,
+  >guc_sched_state_no_lock);
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
return rb_entry(rb, struct i915_priolist, node);
@@ -122,37 +147,29 @@ static inline void set_lrc_desc_registered(struct 
intel_guc *guc, u32 id,
xa_store_irq(>context_lookup, id, ce, GFP_ATOMIC);
 }
 
-static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
+static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 {
-   /* Leaving stub as this function will be used in future patches */
-}
+   int err;
+   struct intel_context *ce = rq->context;
+   u32 action[3];
+   int len = 0;
+   bool enabled = context_enabled(ce);
 
-/*
- * When we're doing submissions using regular execlists backend, writing to
- * ELSP from CPU side is enough to make sure that writes to ringbuffer pages
- * pinned in mappable aperture portion of GGTT are visible to command streamer.
- * Writes done by GuC on our behalf are not guaranteeing such ordering,
- * therefore, to ensure the flush, we're issuing a POSTING READ.
- */
-static void flush_ggtt_writes(struct i915_vma *vma)
-{
-   if (i915_vma_is_map_and_fenceable(vma))
-   intel_uncore_posting_read_fw(vma->vm->gt->uncore,
-GUC_STATUS);
-}
+   if (!enabled) {
+   action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
+   action[len++] = ce->guc_id;
+   action[len++] =