i915: Reorder await_execution before await_request

Tvrtko Ursulin Wed, 27 May 2020 00:39:24 -0700


On 26/05/2020 10:07, Chris Wilson wrote:

Reorder the code so that we can reuse the await_execution from a special
case in await_request in the next patch.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
  drivers/gpu/drm/i915/i915_request.c | 264 ++++++++++++++--------------
  1 file changed, 132 insertions(+), 132 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index c282719ad3ac..33bbad623e02 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1053,37 +1053,91 @@ emit_semaphore_wait(struct i915_request *to,
                                             I915_FENCE_GFP);
  }

+static bool intel_timeline_sync_has_start(struct intel_timeline *tl,

+                                         struct dma_fence *fence)
+{
+       return __intel_timeline_sync_is_later(tl,
+                                             fence->context,
+                                             fence->seqno - 1);
+}
+
+static int intel_timeline_sync_set_start(struct intel_timeline *tl,
+                                        const struct dma_fence *fence)
+{
+       return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
+}
+
  static int
-i915_request_await_request(struct i915_request *to, struct i915_request *from)
+__i915_request_await_execution(struct i915_request *to,
+                              struct i915_request *from,
+                              void (*hook)(struct i915_request *rq,
+                                           struct dma_fence *signal))
  {
-       int ret;
+       int err;

- GEM_BUG_ON(to == from);

-       GEM_BUG_ON(to->timeline == from->timeline);
+       GEM_BUG_ON(intel_context_is_barrier(from->context));

- if (i915_request_completed(from)) {

-               i915_sw_fence_set_error_once(&to->submit, from->fence.error);
+       /* Submit both requests at the same time */
+       err = __await_execution(to, from, hook, I915_FENCE_GFP);
+       if (err)
+               return err;
+
+       /* Squash repeated depenendices to the same timelines */
+       if (intel_timeline_sync_has_start(i915_request_timeline(to),
+                                         &from->fence))
                return 0;
+
+       /*
+        * Wait until the start of this request.
+        *
+        * The execution cb fires when we submit the request to HW. But in
+        * many cases this may be long before the request itself is ready to
+        * run (consider that we submit 2 requests for the same context, where
+        * the request of interest is behind an indefinite spinner). So we hook
+        * up to both to reduce our queues and keep the execution lag minimised
+        * in the worst case, though we hope that the await_start is elided.
+        */
+       err = i915_request_await_start(to, from);
+       if (err < 0)
+               return err;
+
+       /*
+        * Ensure both start together [after all semaphores in signal]
+        *
+        * Now that we are queued to the HW at roughly the same time (thanks
+        * to the execute cb) and are ready to run at roughly the same time
+        * (thanks to the await start), our signaler may still be indefinitely
+        * delayed by waiting on a semaphore from a remote engine. If our
+        * signaler depends on a semaphore, so indirectly do we, and we do not
+        * want to start our payload until our signaler also starts theirs.
+        * So we wait.
+        *
+        * However, there is also a second condition for which we need to wait
+        * for the precise start of the signaler. Consider that the signaler
+        * was submitted in a chain of requests following another context
+        * (with just an ordinary intra-engine fence dependency between the
+        * two). In this case the signaler is queued to HW, but not for
+        * immediate execution, and so we must wait until it reaches the
+        * active slot.
+        */
+       if (intel_engine_has_semaphores(to->engine) &&
+           !i915_request_has_initial_breadcrumb(to)) {
+               err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
+               if (err < 0)
+                       return err;
        }

+ /* Couple the dependency tree for PI on this exposed to->fence */

        if (to->engine->schedule) {
-               ret = i915_sched_node_add_dependency(&to->sched,
+               err = i915_sched_node_add_dependency(&to->sched,
                                                     &from->sched,
-                                                    I915_DEPENDENCY_EXTERNAL);
-               if (ret < 0)
-                       return ret;
+                                                    I915_DEPENDENCY_WEAK);
+               if (err < 0)
+                       return err;
        }

- if (to->engine == from->engine)

-               ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
-                                                      &from->submit,
-                                                      I915_FENCE_GFP);
-       else
-               ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
-       if (ret < 0)
-               return ret;
-
-       return 0;
+       return intel_timeline_sync_set_start(i915_request_timeline(to),
+                                            &from->fence);
  }

static void mark_external(struct i915_request *rq)

@@ -1136,23 +1190,20 @@ i915_request_await_external(struct i915_request *rq, 
struct dma_fence *fence)
  }

int

-i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
+i915_request_await_execution(struct i915_request *rq,
+                            struct dma_fence *fence,
+                            void (*hook)(struct i915_request *rq,
+                                         struct dma_fence *signal))
  {
        struct dma_fence **child = &fence;
        unsigned int nchild = 1;
        int ret;

- /*

-        * Note that if the fence-array was created in signal-on-any mode,
-        * we should *not* decompose it into its individual fences. However,
-        * we don't currently store which mode the fence-array is operating
-        * in. Fortunately, the only user of signal-on-any is private to
-        * amdgpu and we should not see any incoming fence-array from
-        * sync-file being in signal-on-any mode.
-        */
        if (dma_fence_is_array(fence)) {
                struct dma_fence_array *array = to_dma_fence_array(fence);

+ /* XXX Error for signal-on-any fence arrays */

+
                child = array->fences;
                nchild = array->num_fences;
                GEM_BUG_ON(!nchild);
@@ -1165,138 +1216,78 @@ i915_request_await_dma_fence(struct i915_request *rq, 
struct dma_fence *fence)
                        continue;
                }

- /*

-                * Requests on the same timeline are explicitly ordered, along
-                * with their dependencies, by i915_request_add() which ensures
-                * that requests are submitted in-order through each ring.
-                */
                if (fence->context == rq->fence.context)
                        continue;

- /* Squash repeated waits to the same timelines */

-               if (fence->context &&
-                   intel_timeline_sync_is_later(i915_request_timeline(rq),
-                                                fence))
-                       continue;
+               /*
+                * We don't squash repeated fence dependencies here as we
+                * want to run our callback in all cases.
+                */

if (dma_fence_is_i915(fence))

-                       ret = i915_request_await_request(rq, to_request(fence));
+                       ret = __i915_request_await_execution(rq,
+                                                            to_request(fence),
+                                                            hook);
                else
                        ret = i915_request_await_external(rq, fence);
                if (ret < 0)
                        return ret;
-
-               /* Record the latest fence used against each timeline */
-               if (fence->context)
-                       intel_timeline_sync_set(i915_request_timeline(rq),
-                                               fence);
        } while (--nchild);

return 0;

-static bool intel_timeline_sync_has_start(struct intel_timeline *tl,

-                                         struct dma_fence *fence)
-{
-       return __intel_timeline_sync_is_later(tl,
-                                             fence->context,
-                                             fence->seqno - 1);
-}
-
-static int intel_timeline_sync_set_start(struct intel_timeline *tl,
-                                        const struct dma_fence *fence)
-{
-       return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
-}
-
  static int
-__i915_request_await_execution(struct i915_request *to,
-                              struct i915_request *from,
-                              void (*hook)(struct i915_request *rq,
-                                           struct dma_fence *signal))
+i915_request_await_request(struct i915_request *to, struct i915_request *from)
  {
-       int err;
-
-       GEM_BUG_ON(intel_context_is_barrier(from->context));
+       int ret;

- /* Submit both requests at the same time */

-       err = __await_execution(to, from, hook, I915_FENCE_GFP);
-       if (err)
-               return err;
+       GEM_BUG_ON(to == from);
+       GEM_BUG_ON(to->timeline == from->timeline);

- /* Squash repeated depenendices to the same timelines */

-       if (intel_timeline_sync_has_start(i915_request_timeline(to),
-                                         &from->fence))
+       if (i915_request_completed(from)) {
+               i915_sw_fence_set_error_once(&to->submit, from->fence.error);
                return 0;
-
-       /*
-        * Wait until the start of this request.
-        *
-        * The execution cb fires when we submit the request to HW. But in
-        * many cases this may be long before the request itself is ready to
-        * run (consider that we submit 2 requests for the same context, where
-        * the request of interest is behind an indefinite spinner). So we hook
-        * up to both to reduce our queues and keep the execution lag minimised
-        * in the worst case, though we hope that the await_start is elided.
-        */
-       err = i915_request_await_start(to, from);
-       if (err < 0)
-               return err;
-
-       /*
-        * Ensure both start together [after all semaphores in signal]
-        *
-        * Now that we are queued to the HW at roughly the same time (thanks
-        * to the execute cb) and are ready to run at roughly the same time
-        * (thanks to the await start), our signaler may still be indefinitely
-        * delayed by waiting on a semaphore from a remote engine. If our
-        * signaler depends on a semaphore, so indirectly do we, and we do not
-        * want to start our payload until our signaler also starts theirs.
-        * So we wait.
-        *
-        * However, there is also a second condition for which we need to wait
-        * for the precise start of the signaler. Consider that the signaler
-        * was submitted in a chain of requests following another context
-        * (with just an ordinary intra-engine fence dependency between the
-        * two). In this case the signaler is queued to HW, but not for
-        * immediate execution, and so we must wait until it reaches the
-        * active slot.
-        */
-       if (intel_engine_has_semaphores(to->engine) &&
-           !i915_request_has_initial_breadcrumb(to)) {
-               err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
-               if (err < 0)
-                       return err;
        }

- /* Couple the dependency tree for PI on this exposed to->fence */

        if (to->engine->schedule) {
-               err = i915_sched_node_add_dependency(&to->sched,
+               ret = i915_sched_node_add_dependency(&to->sched,
                                                     &from->sched,
-                                                    I915_DEPENDENCY_WEAK);
-               if (err < 0)
-                       return err;
+                                                    I915_DEPENDENCY_EXTERNAL);
+               if (ret < 0)
+                       return ret;
        }

- return intel_timeline_sync_set_start(i915_request_timeline(to),

-                                            &from->fence);
+       if (to->engine == READ_ONCE(from->engine))
+               ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
+                                                      &from->submit,
+                                                      I915_FENCE_GFP);
+       else
+               ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
+       if (ret < 0)
+               return ret;
+
+       return 0;
  }

int

-i915_request_await_execution(struct i915_request *rq,
-                            struct dma_fence *fence,
-                            void (*hook)(struct i915_request *rq,
-                                         struct dma_fence *signal))
+i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
  {
        struct dma_fence **child = &fence;
        unsigned int nchild = 1;
        int ret;

+ /*

+        * Note that if the fence-array was created in signal-on-any mode,
+        * we should *not* decompose it into its individual fences. However,
+        * we don't currently store which mode the fence-array is operating
+        * in. Fortunately, the only user of signal-on-any is private to
+        * amdgpu and we should not see any incoming fence-array from
+        * sync-file being in signal-on-any mode.
+        */
        if (dma_fence_is_array(fence)) {
                struct dma_fence_array *array = to_dma_fence_array(fence);

- /* XXX Error for signal-on-any fence arrays */

-
                child = array->fences;
                nchild = array->num_fences;
                GEM_BUG_ON(!nchild);
@@ -1309,22 +1300,31 @@ i915_request_await_execution(struct i915_request *rq,
                        continue;
                }

+ /*

+                * Requests on the same timeline are explicitly ordered, along
+                * with their dependencies, by i915_request_add() which ensures
+                * that requests are submitted in-order through each ring.
+                */
                if (fence->context == rq->fence.context)
                        continue;

- /*

-                * We don't squash repeated fence dependencies here as we
-                * want to run our callback in all cases.
-                */
+               /* Squash repeated waits to the same timelines */
+               if (fence->context &&
+                   intel_timeline_sync_is_later(i915_request_timeline(rq),
+                                                fence))
+                       continue;

if (dma_fence_is_i915(fence))

-                       ret = __i915_request_await_execution(rq,
-                                                            to_request(fence),
-                                                            hook);
+                       ret = i915_request_await_request(rq, to_request(fence));
                else
                        ret = i915_request_await_external(rq, fence);
                if (ret < 0)
                        return ret;
+
+               /* Record the latest fence used against each timeline */
+               if (fence->context)
+                       intel_timeline_sync_set(i915_request_timeline(rq),
+                                               fence);
        } while (--nchild);

return 0;


Reviewed-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 1/2] drm/i915: Reorder await_execution before await_request

Reply via email to