[Intel-gfx] [PATCH 21/25] drm/i915/guc: Handle errors in multi-lrc requests

2021-10-14 Thread Matthew Brost
If an error occurs in the front end when multi-lrc requests are getting
generated we need to skip these in the backend but we still need to
emit the breadcrumbs seqno. An issues arises because with multi-lrc
breadcrumbs there is a handshake between the parent and children to make
forward progress. If all the requests are not present this handshake
doesn't work. To work around this, if multi-lrc request has an error we
skip the handshake but still emit the breadcrumbs seqno.

v2:
 (John Harrison)
  - Add comment explaining the skipping of the handshake logic
  - Fix typos in the commit message
v3:
 (John Harrison)
  - Fix up some comments about the math to NOP the ring

Signed-off-by: Matthew Brost 
Reviewed-by: John Harrison 
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 69 ++-
 1 file changed, 66 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 361fab2cae99..d7710debcd47 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -4070,8 +4070,8 @@ static int 
emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
 }
 
 static u32 *
-emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
-u32 *cs)
+__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+  u32 *cs)
 {
struct intel_context *ce = rq->context;
u8 i;
@@ -4099,6 +4099,45 @@ emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct 
i915_request *rq,
  get_children_go_addr(ce),
  0);
 
+   return cs;
+}
+
+/*
+ * If this true, a submission of multi-lrc requests had an error and the
+ * requests need to be skipped. The front end (execuf IOCTL) should've called
+ * i915_request_skip which squashes the BB but we still need to emit the fini
+ * breadrcrumbs seqno write. At this point we don't know how many of the
+ * requests in the multi-lrc submission were generated so we can't do the
+ * handshake between the parent and children (e.g. if 4 requests should be
+ * generated but 2nd hit an error only 1 would be seen by the GuC backend).
+ * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
+ * has occurred on any of the requests in submission / relationship.
+ */
+static inline bool skip_handshake(struct i915_request *rq)
+{
+   return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
+}
+
+static u32 *
+emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+u32 *cs)
+{
+   struct intel_context *ce = rq->context;
+
+   GEM_BUG_ON(!intel_context_is_parent(ce));
+
+   if (unlikely(skip_handshake(rq))) {
+   /*
+* NOP everything in 
__emit_fini_breadcrumb_parent_no_preempt_mid_batch,
+* the -6 comes from the length of the emits below.
+*/
+   memset(cs, 0, sizeof(u32) *
+  (ce->engine->emit_fini_breadcrumb_dw - 6));
+   cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+   } else {
+   cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
+   }
+
/* Emit fini breadcrumb */
cs = gen8_emit_ggtt_write(cs,
  rq->fence.seqno,
@@ -4115,7 +4154,8 @@ emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct 
i915_request *rq,
 }
 
 static u32 *
-emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, u32 
*cs)
+__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs)
 {
struct intel_context *ce = rq->context;
struct intel_context *parent = intel_context_to_parent(ce);
@@ -4142,6 +4182,29 @@ emit_fini_breadcrumb_child_no_preempt_mid_batch(struct 
i915_request *rq, u32 *cs
*cs++ = get_children_go_addr(parent);
*cs++ = 0;
 
+   return cs;
+}
+
+static u32 *
+emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+   u32 *cs)
+{
+   struct intel_context *ce = rq->context;
+
+   GEM_BUG_ON(!intel_context_is_child(ce));
+
+   if (unlikely(skip_handshake(rq))) {
+   /*
+* NOP everything in 
__emit_fini_breadcrumb_child_no_preempt_mid_batch,
+* the -6 comes from the length of the emits below.
+*/
+   memset(cs, 0, sizeof(u32) *
+  (ce->engine->emit_fini_breadcrumb_dw - 6));
+   cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+   } else {
+   cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
+   }
+
/* Emit fini breadcrumb *

Re: [Intel-gfx] [PATCH 21/25] drm/i915/guc: Handle errors in multi-lrc requests

2021-10-13 Thread John Harrison

On 10/13/2021 13:42, Matthew Brost wrote:

If an error occurs in the front end when multi-lrc requests are getting
generated we need to skip these in the backend but we still need to
emit the breadcrumbs seqno. An issues arises because with multi-lrc
breadcrumbs there is a handshake between the parent and children to make
forward progress. If all the requests are not present this handshake
doesn't work. To work around this, if multi-lrc request has an error we
skip the handshake but still emit the breadcrumbs seqno.

v2:
  (John Harrison)
   - Add comment explaining the skipping of the handshake logic
   - Fix typos in the commit message
v3:
  (John Harrison)
   - Fix up some comments about the math to NOP the ring

Signed-off-by: Matthew Brost 

Reviewed-by: John Harrison 


---
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 69 ++-
  1 file changed, 66 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index bfafe996e2d2..80d8ce68ff59 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -4076,8 +4076,8 @@ static int 
emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
  }
  
  static u32 *

-emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
-u32 *cs)
+__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+  u32 *cs)
  {
struct intel_context *ce = rq->context;
u8 i;
@@ -4105,6 +4105,45 @@ emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct 
i915_request *rq,
  get_children_go_addr(ce),
  0);
  
+	return cs;

+}
+
+/*
+ * If this true, a submission of multi-lrc requests had an error and the
+ * requests need to be skipped. The front end (execuf IOCTL) should've called
+ * i915_request_skip which squashes the BB but we still need to emit the fini
+ * breadrcrumbs seqno write. At this point we don't know how many of the
+ * requests in the multi-lrc submission were generated so we can't do the
+ * handshake between the parent and children (e.g. if 4 requests should be
+ * generated but 2nd hit an error only 1 would be seen by the GuC backend).
+ * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
+ * has occurred on any of the requests in submission / relationship.
+ */
+static inline bool skip_handshake(struct i915_request *rq)
+{
+   return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
+}
+
+static u32 *
+emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+u32 *cs)
+{
+   struct intel_context *ce = rq->context;
+
+   GEM_BUG_ON(!intel_context_is_parent(ce));
+
+   if (unlikely(skip_handshake(rq))) {
+   /*
+* NOP everything in 
__emit_fini_breadcrumb_parent_no_preempt_mid_batch,
+* the -6 comes from the length of the emits below.
+*/
+   memset(cs, 0, sizeof(u32) *
+  (ce->engine->emit_fini_breadcrumb_dw - 6));
+   cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+   } else {
+   cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
+   }
+
/* Emit fini breadcrumb */
cs = gen8_emit_ggtt_write(cs,
  rq->fence.seqno,
@@ -4121,7 +4160,8 @@ emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct 
i915_request *rq,
  }
  
  static u32 *

-emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, u32 
*cs)
+__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs)
  {
struct intel_context *ce = rq->context;
struct intel_context *parent = intel_context_to_parent(ce);
@@ -4148,6 +4188,29 @@ emit_fini_breadcrumb_child_no_preempt_mid_batch(struct 
i915_request *rq, u32 *cs
*cs++ = get_children_go_addr(parent);
*cs++ = 0;
  
+	return cs;

+}
+
+static u32 *
+emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+   u32 *cs)
+{
+   struct intel_context *ce = rq->context;
+
+   GEM_BUG_ON(!intel_context_is_child(ce));
+
+   if (unlikely(skip_handshake(rq))) {
+   /*
+* NOP everything in 
__emit_fini_breadcrumb_child_no_preempt_mid_batch,
+* the -6 comes from the length of the emits below.
+*/
+   memset(cs, 0, sizeof(u32) *
+  (ce->engine->emit_fini_breadcrumb_dw - 6));
+   cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+   } else {
+   cs = __emit_fini_breadcrumb_child_no_preempt_mid_batc

[Intel-gfx] [PATCH 21/25] drm/i915/guc: Handle errors in multi-lrc requests

2021-10-13 Thread Matthew Brost
If an error occurs in the front end when multi-lrc requests are getting
generated we need to skip these in the backend but we still need to
emit the breadcrumbs seqno. An issues arises because with multi-lrc
breadcrumbs there is a handshake between the parent and children to make
forward progress. If all the requests are not present this handshake
doesn't work. To work around this, if multi-lrc request has an error we
skip the handshake but still emit the breadcrumbs seqno.

v2:
 (John Harrison)
  - Add comment explaining the skipping of the handshake logic
  - Fix typos in the commit message
v3:
 (John Harrison)
  - Fix up some comments about the math to NOP the ring

Signed-off-by: Matthew Brost 
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 69 ++-
 1 file changed, 66 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index bfafe996e2d2..80d8ce68ff59 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -4076,8 +4076,8 @@ static int 
emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
 }
 
 static u32 *
-emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
-u32 *cs)
+__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+  u32 *cs)
 {
struct intel_context *ce = rq->context;
u8 i;
@@ -4105,6 +4105,45 @@ emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct 
i915_request *rq,
  get_children_go_addr(ce),
  0);
 
+   return cs;
+}
+
+/*
+ * If this true, a submission of multi-lrc requests had an error and the
+ * requests need to be skipped. The front end (execuf IOCTL) should've called
+ * i915_request_skip which squashes the BB but we still need to emit the fini
+ * breadrcrumbs seqno write. At this point we don't know how many of the
+ * requests in the multi-lrc submission were generated so we can't do the
+ * handshake between the parent and children (e.g. if 4 requests should be
+ * generated but 2nd hit an error only 1 would be seen by the GuC backend).
+ * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
+ * has occurred on any of the requests in submission / relationship.
+ */
+static inline bool skip_handshake(struct i915_request *rq)
+{
+   return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
+}
+
+static u32 *
+emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
+u32 *cs)
+{
+   struct intel_context *ce = rq->context;
+
+   GEM_BUG_ON(!intel_context_is_parent(ce));
+
+   if (unlikely(skip_handshake(rq))) {
+   /*
+* NOP everything in 
__emit_fini_breadcrumb_parent_no_preempt_mid_batch,
+* the -6 comes from the length of the emits below.
+*/
+   memset(cs, 0, sizeof(u32) *
+  (ce->engine->emit_fini_breadcrumb_dw - 6));
+   cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+   } else {
+   cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
+   }
+
/* Emit fini breadcrumb */
cs = gen8_emit_ggtt_write(cs,
  rq->fence.seqno,
@@ -4121,7 +4160,8 @@ emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct 
i915_request *rq,
 }
 
 static u32 *
-emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, u32 
*cs)
+__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+ u32 *cs)
 {
struct intel_context *ce = rq->context;
struct intel_context *parent = intel_context_to_parent(ce);
@@ -4148,6 +4188,29 @@ emit_fini_breadcrumb_child_no_preempt_mid_batch(struct 
i915_request *rq, u32 *cs
*cs++ = get_children_go_addr(parent);
*cs++ = 0;
 
+   return cs;
+}
+
+static u32 *
+emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
+   u32 *cs)
+{
+   struct intel_context *ce = rq->context;
+
+   GEM_BUG_ON(!intel_context_is_child(ce));
+
+   if (unlikely(skip_handshake(rq))) {
+   /*
+* NOP everything in 
__emit_fini_breadcrumb_child_no_preempt_mid_batch,
+* the -6 comes from the length of the emits below.
+*/
+   memset(cs, 0, sizeof(u32) *
+  (ce->engine->emit_fini_breadcrumb_dw - 6));
+   cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+   } else {
+   cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
+   }
+
/* Emit fini breadcrumb */
cs = gen8_emit_ggt