[Intel-gfx] [PATCH 13/25] drm/i915/guc: Insert submit fences between requests in parent-child relationship

2021-10-14 Thread Matthew Brost
The GuC must receive requests in the order submitted for contexts in a
parent-child relationship to function correctly. To ensure this, insert
a submit fence between the current request and last request submitted
for requests / contexts in a parent child relationship. This is
conceptually similar to a single timeline.

Signed-off-by: Matthew Brost 
Cc: John Harrison 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_context.h   |   5 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |   6 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   5 +-
 drivers/gpu/drm/i915/i915_request.c   | 120 ++
 4 files changed, 108 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index 9f0995150a7a..edf12caaade3 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -77,6 +77,11 @@ intel_context_to_parent(struct intel_context *ce)
}
 }
 
+static inline bool intel_context_is_parallel(struct intel_context *ce)
+{
+   return intel_context_is_child(ce) || intel_context_is_parent(ce);
+}
+
 void intel_context_bind_parent_child(struct intel_context *parent,
 struct intel_context *child);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 48decb5ee954..8309d1141d0a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -237,6 +237,12 @@ struct intel_context {
};
/** @parent: pointer to parent if child */
struct intel_context *parent;
+   /**
+* @last_rq: last request submitted on a parallel context, used
+* to insert submit fences between requests in the parallel
+* context
+*/
+   struct i915_request *last_rq;
/** @number_children: number of children if parent */
u8 number_children;
/** @guc: GuC specific members for parallel submission */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 71ae5eb69849..ebb64fb50396 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -684,8 +684,7 @@ static inline int rq_prio(const struct i915_request *rq)
 
 static bool is_multi_lrc_rq(struct i915_request *rq)
 {
-   return intel_context_is_child(rq->context) ||
-   intel_context_is_parent(rq->context);
+   return intel_context_is_parallel(rq->context);
 }
 
 static bool can_merge_rq(struct i915_request *rq,
@@ -2873,6 +2872,8 @@ static void guc_parent_context_unpin(struct intel_context 
*ce)
GEM_BUG_ON(!intel_context_is_parent(ce));
GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
 
+   if (ce->parallel.last_rq)
+   i915_request_put(ce->parallel.last_rq);
unpin_guc_id(guc, ce);
lrc_unpin(ce);
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index c0d27072c28d..8bdf9f2f9b90 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1525,36 +1525,62 @@ i915_request_await_object(struct i915_request *to,
return ret;
 }
 
+static inline bool is_parallel_rq(struct i915_request *rq)
+{
+   return intel_context_is_parallel(rq->context);
+}
+
+static inline struct intel_context *request_to_parent(struct i915_request *rq)
+{
+   return intel_context_to_parent(rq->context);
+}
+
 static struct i915_request *
-__i915_request_add_to_timeline(struct i915_request *rq)
+__i915_request_ensure_parallel_ordering(struct i915_request *rq,
+   struct intel_timeline *timeline)
 {
-   struct intel_timeline *timeline = i915_request_timeline(rq);
struct i915_request *prev;
 
-   /*
-* Dependency tracking and request ordering along the timeline
-* is special cased so that we can eliminate redundant ordering
-* operations while building the request (we know that the timeline
-* itself is ordered, and here we guarantee it).
-*
-* As we know we will need to emit tracking along the timeline,
-* we embed the hooks into our request struct -- at the cost of
-* having to have specialised no-allocation interfaces (which will
-* be beneficial elsewhere).
-*
-* A second benefit to open-coding i915_request_await_request is
-* that we can apply a slight variant of the rules specialised
-* for timelines that jump between engines (such as virtual engines).
-* If we consider the case of virtual engine, we must emit a dma-fence
-* to prevent scheduling of the second request until the first is
-* compl

[Intel-gfx] [PATCH 13/25] drm/i915/guc: Insert submit fences between requests in parent-child relationship

2021-10-13 Thread Matthew Brost
The GuC must receive requests in the order submitted for contexts in a
parent-child relationship to function correctly. To ensure this, insert
a submit fence between the current request and last request submitted
for requests / contexts in a parent child relationship. This is
conceptually similar to a single timeline.

Signed-off-by: Matthew Brost 
Cc: John Harrison 
Reviewed-by: John Harrison 
---
 drivers/gpu/drm/i915/gt/intel_context.h   |   5 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |   6 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   5 +-
 drivers/gpu/drm/i915/i915_request.c   | 120 ++
 4 files changed, 108 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.h 
b/drivers/gpu/drm/i915/gt/intel_context.h
index b63c10a144af..1bc705f98e2a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -75,6 +75,11 @@ intel_context_to_parent(struct intel_context *ce)
}
 }
 
+static inline bool intel_context_is_parallel(struct intel_context *ce)
+{
+   return intel_context_is_child(ce) || intel_context_is_parent(ce);
+}
+
 void intel_context_bind_parent_child(struct intel_context *parent,
 struct intel_context *child);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 48decb5ee954..8309d1141d0a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -237,6 +237,12 @@ struct intel_context {
};
/** @parent: pointer to parent if child */
struct intel_context *parent;
+   /**
+* @last_rq: last request submitted on a parallel context, used
+* to insert submit fences between requests in the parallel
+* context
+*/
+   struct i915_request *last_rq;
/** @number_children: number of children if parent */
u8 number_children;
/** @guc: GuC specific members for parallel submission */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 77591e764195..f690b7c2b295 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -685,8 +685,7 @@ static inline int rq_prio(const struct i915_request *rq)
 
 static bool is_multi_lrc_rq(struct i915_request *rq)
 {
-   return intel_context_is_child(rq->context) ||
-   intel_context_is_parent(rq->context);
+   return intel_context_is_parallel(rq->context);
 }
 
 static bool can_merge_rq(struct i915_request *rq,
@@ -2874,6 +2873,8 @@ static void guc_parent_context_unpin(struct intel_context 
*ce)
GEM_BUG_ON(!intel_context_is_parent(ce));
GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
 
+   if (ce->parallel.last_rq)
+   i915_request_put(ce->parallel.last_rq);
unpin_guc_id(guc, ce);
lrc_unpin(ce);
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index c0d27072c28d..8bdf9f2f9b90 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1525,36 +1525,62 @@ i915_request_await_object(struct i915_request *to,
return ret;
 }
 
+static inline bool is_parallel_rq(struct i915_request *rq)
+{
+   return intel_context_is_parallel(rq->context);
+}
+
+static inline struct intel_context *request_to_parent(struct i915_request *rq)
+{
+   return intel_context_to_parent(rq->context);
+}
+
 static struct i915_request *
-__i915_request_add_to_timeline(struct i915_request *rq)
+__i915_request_ensure_parallel_ordering(struct i915_request *rq,
+   struct intel_timeline *timeline)
 {
-   struct intel_timeline *timeline = i915_request_timeline(rq);
struct i915_request *prev;
 
-   /*
-* Dependency tracking and request ordering along the timeline
-* is special cased so that we can eliminate redundant ordering
-* operations while building the request (we know that the timeline
-* itself is ordered, and here we guarantee it).
-*
-* As we know we will need to emit tracking along the timeline,
-* we embed the hooks into our request struct -- at the cost of
-* having to have specialised no-allocation interfaces (which will
-* be beneficial elsewhere).
-*
-* A second benefit to open-coding i915_request_await_request is
-* that we can apply a slight variant of the rules specialised
-* for timelines that jump between engines (such as virtual engines).
-* If we consider the case of virtual engine, we must emit a dma-fence
-* to prevent scheduling of the second request until the first is
-* compl