Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists
On 12/22/2021 14:35, Matthew Brost wrote: A weak implementation of parallel submission (multi-bb execbuf IOCTL) for execlists. Doing as little as possible to support this interface for execlists - basically just passing submit fences between each request generated and virtual engines are not allowed. This is on par with what is there for the existing (hopefully soon deprecated) bonding interface. We perma-pin these execlists contexts to align with GuC implementation. v2: (John Harrison) - Drop siblings array as num_siblings must be 1 v3: (John Harrison) - Drop single submission v4: (John Harrison) - Actually drop single submission - Use IS_ERR check on return value from intel_context_create - Set last request to NULL on unpin Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 11 -- drivers/gpu/drm/i915/gt/intel_context.c | 4 +- .../drm/i915/gt/intel_execlists_submission.c | 38 +++ drivers/gpu/drm/i915/gt/intel_lrc.c | 4 ++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - 5 files changed, 51 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index cad3f0b2be9e..b0d2d81fc3b3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, struct intel_engine_cs **siblings = NULL; intel_engine_mask_t prev_mask; - /* FIXME: This is NIY for execlists */ - if (!(intel_uc_uses_guc_submission(&to_gt(i915)->uc))) - return -ENODEV; - if (get_user(slot, &ext->engine_index)) return -EFAULT; @@ -583,6 +579,13 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, if (get_user(num_siblings, &ext->num_siblings)) return -EFAULT; + if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc) && + num_siblings != 1) { + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", + num_siblings); + return -EINVAL; + } + if (slot >= set->num_engines) { drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", slot, set->num_engines); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index ba083d800a08..5d0ec7c49b6a 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) __i915_active_acquire(&ce->active); - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || + intel_context_is_parallel(ce)) return 0; /* Preallocate tracking nodes */ @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, * Callers responsibility to validate that this function is used * correctly but we use GEM_BUG_ON here ensure that they do. */ - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); GEM_BUG_ON(intel_context_is_pinned(parent)); GEM_BUG_ON(intel_context_is_child(parent)); GEM_BUG_ON(intel_context_is_pinned(child)); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index a69df5e9e77a..be56d0b41892 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2599,6 +2599,43 @@ static void execlists_context_cancel_request(struct intel_context *ce, current->comm); } +static struct intel_context * +execlists_create_parallel(struct intel_engine_cs **engines, + unsigned int num_siblings, + unsigned int width) +{ + struct intel_context *parent = NULL, *ce, *err; + int i; + + GEM_BUG_ON(num_siblings != 1); + + for (i = 0; i < width; ++i) { + ce = intel_context_create(engines[i]); + if (IS_ERR(ce)) { + err = ce; Could get rid of 'err' and just say 'return ce;' at the end of 'unwind:'. Either way: Reviewed-by: John Harrison + goto unwind; + } + + if (i == 0) + parent = ce; + else + intel_context_bind_parent_child(parent, ce); + } + + parent->parallel.fence_context = dma_fence_context_alloc(1); + + intel_context_set_nopreempt(parent); + for_each_child(parent, ce) + intel_context_set_nopreempt(ce); + + return parent; + +unwind: +
Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists
On Mon, Dec 06, 2021 at 12:01:04PM -0800, John Harrison wrote: > On 11/11/2021 13:20, Matthew Brost wrote: > > A weak implementation of parallel submission (multi-bb execbuf IOCTL) for > > execlists. Doing as little as possible to support this interface for > > execlists - basically just passing submit fences between each request > > generated and virtual engines are not allowed. This is on par with what > > is there for the existing (hopefully soon deprecated) bonding interface. > > > > We perma-pin these execlists contexts to align with GuC implementation. > > > > v2: > > (John Harrison) > >- Drop siblings array as num_siblings must be 1 > > v3: > > (John Harrison) > >- Drop single submission > > > > Signed-off-by: Matthew Brost > > --- > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- > > drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > > .../drm/i915/gt/intel_execlists_submission.c | 40 +++ > > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > > 5 files changed, 50 insertions(+), 8 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > index ebd775cb1661c..d7bf6c8f70b7b 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct > > i915_user_extension __user *base, > > struct intel_engine_cs **siblings = NULL; > > intel_engine_mask_t prev_mask; > > - /* FIXME: This is NIY for execlists */ > > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > > - return -ENODEV; > > - > > if (get_user(slot, &ext->engine_index)) > > return -EFAULT; > > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct > > i915_user_extension __user *base, > > if (get_user(num_siblings, &ext->num_siblings)) > > return -EFAULT; > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { > > + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC > > mode\n", > > + num_siblings); > > + return -EINVAL; > > + } > > + > > if (slot >= set->num_engines) { > > drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", > > slot, set->num_engines); > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c > > b/drivers/gpu/drm/i915/gt/intel_context.c > > index 5634d14052bc9..1bec92e1d8e63 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_context.c > > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct > > intel_context *ce) > > __i915_active_acquire(&ce->active); > > - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) > > + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || > > + intel_context_is_parallel(ce)) > > return 0; > > /* Preallocate tracking nodes */ > > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct > > intel_context *parent, > > * Callers responsibility to validate that this function is used > > * correctly but we use GEM_BUG_ON here ensure that they do. > > */ > > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > > GEM_BUG_ON(intel_context_is_pinned(parent)); > > GEM_BUG_ON(intel_context_is_child(parent)); > > GEM_BUG_ON(intel_context_is_pinned(child)); > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > index ca03880fa7e49..5fd49ee47096d 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > @@ -2598,6 +2598,45 @@ static void execlists_context_cancel_request(struct > > intel_context *ce, > > current->comm); > > } > > +static struct intel_context * > > +execlists_create_parallel(struct intel_engine_cs **engines, > > + unsigned int num_siblings, > > + unsigned int width) > > +{ > > + struct intel_context *parent = NULL, *ce, *err; > > + int i; > > + > > + GEM_BUG_ON(num_siblings != 1); > > + > > + for (i = 0; i < width; ++i) { > > + ce = intel_context_create(engines[i]); > > + if (!ce) { > > + err = ERR_PTR(-ENOMEM); > intel_context_create already checks for null and returns -ENOMEM. This needs > to check for IS_ERR(ce). > Yep. > > + goto unwind; > > + } > > + > > + if (i == 0) > > + parent = ce; > > + else > > + intel_context_bind_parent_child(parent, ce); > > + } > > + > > + parent->parallel.fence_context = dma_fence_context_alloc(1); > > + > > + intel_context_set_nopreempt(p
Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists
On 11/11/2021 13:20, Matthew Brost wrote: A weak implementation of parallel submission (multi-bb execbuf IOCTL) for execlists. Doing as little as possible to support this interface for execlists - basically just passing submit fences between each request generated and virtual engines are not allowed. This is on par with what is there for the existing (hopefully soon deprecated) bonding interface. We perma-pin these execlists contexts to align with GuC implementation. v2: (John Harrison) - Drop siblings array as num_siblings must be 1 v3: (John Harrison) - Drop single submission Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- drivers/gpu/drm/i915/gt/intel_context.c | 4 +- .../drm/i915/gt/intel_execlists_submission.c | 40 +++ drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - 5 files changed, 50 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index ebd775cb1661c..d7bf6c8f70b7b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, struct intel_engine_cs **siblings = NULL; intel_engine_mask_t prev_mask; - /* FIXME: This is NIY for execlists */ - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) - return -ENODEV; - if (get_user(slot, &ext->engine_index)) return -EFAULT; @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, if (get_user(num_siblings, &ext->num_siblings)) return -EFAULT; + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", + num_siblings); + return -EINVAL; + } + if (slot >= set->num_engines) { drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", slot, set->num_engines); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5634d14052bc9..1bec92e1d8e63 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) __i915_active_acquire(&ce->active); - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || + intel_context_is_parallel(ce)) return 0; /* Preallocate tracking nodes */ @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, * Callers responsibility to validate that this function is used * correctly but we use GEM_BUG_ON here ensure that they do. */ - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); GEM_BUG_ON(intel_context_is_pinned(parent)); GEM_BUG_ON(intel_context_is_child(parent)); GEM_BUG_ON(intel_context_is_pinned(child)); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index ca03880fa7e49..5fd49ee47096d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2598,6 +2598,45 @@ static void execlists_context_cancel_request(struct intel_context *ce, current->comm); } +static struct intel_context * +execlists_create_parallel(struct intel_engine_cs **engines, + unsigned int num_siblings, + unsigned int width) +{ + struct intel_context *parent = NULL, *ce, *err; + int i; + + GEM_BUG_ON(num_siblings != 1); + + for (i = 0; i < width; ++i) { + ce = intel_context_create(engines[i]); + if (!ce) { + err = ERR_PTR(-ENOMEM); intel_context_create already checks for null and returns -ENOMEM. This needs to check for IS_ERR(ce). + goto unwind; + } + + if (i == 0) + parent = ce; + else + intel_context_bind_parent_child(parent, ce); + } + + parent->parallel.fence_context = dma_fence_context_alloc(1); + + intel_context_set_nopreempt(parent); + for_each_child(parent, ce) { + intel_context_set_nopreempt(ce); + intel_context_set_single_submission(ce); I thought the single submission thing wasn't wanted anymore? + } + + return parent; + +unwind: + if (parent) + intel_context_put(pare
Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists
On 12/11/2021 17:59, Matthew Brost wrote: On Fri, Nov 12, 2021 at 02:13:50PM +, Tvrtko Ursulin wrote: On 11/11/2021 16:49, Matthew Brost wrote: On Mon, Nov 01, 2021 at 10:35:09AM +, Tvrtko Ursulin wrote: On 27/10/2021 21:10, Matthew Brost wrote: On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: On 10/27/2021 12:17, Matthew Brost wrote: On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: On 10/20/2021 14:47, Matthew Brost wrote: A weak implementation of parallel submission (multi-bb execbuf IOCTL) for execlists. Doing as little as possible to support this interface for execlists - basically just passing submit fences between each request generated and virtual engines are not allowed. This is on par with what is there for the existing (hopefully soon deprecated) bonding interface. We perma-pin these execlists contexts to align with GuC implementation. v2: (John Harrison) - Drop siblings array as num_siblings must be 1 Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- drivers/gpu/drm/i915/gt/intel_context.c | 4 +- .../drm/i915/gt/intel_execlists_submission.c | 44 ++- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - 5 files changed, 52 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fb33d0322960..35e87a7d0ea9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, struct intel_engine_cs **siblings = NULL; intel_engine_mask_t prev_mask; - /* FIXME: This is NIY for execlists */ - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) - return -ENODEV; - if (get_user(slot, &ext->engine_index)) return -EFAULT; @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, if (get_user(num_siblings, &ext->num_siblings)) return -EFAULT; + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", + num_siblings); + return -EINVAL; + } + if (slot >= set->num_engines) { drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", slot, set->num_engines); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5634d14052bc..1bec92e1d8e6 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) __i915_active_acquire(&ce->active); - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || + intel_context_is_parallel(ce)) return 0; /* Preallocate tracking nodes */ @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, * Callers responsibility to validate that this function is used * correctly but we use GEM_BUG_ON here ensure that they do. */ - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); GEM_BUG_ON(intel_context_is_pinned(parent)); GEM_BUG_ON(intel_context_is_child(parent)); GEM_BUG_ON(intel_context_is_pinned(child)); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index bedb80057046..2865b422300d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) static bool ctx_single_port_submission(const struct intel_context *ce) { - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && - intel_context_force_single_submission(ce)); + return intel_context_force_single_submission(ce); I think this is actually going to break GVT. Not so much this change here but the whole use of single submission outside of GVT. It looks like the GVT driver overloads the single submission flag to tag requests that it owns. If we start using that flag elsewhere when GVT is active, I think that will cause much confusion within the GVT code. The correct fix would be to create a new flag just for GVT usage alongside the single submission one. GVT would then set both but only check for its own private flag. The parallel code would obviously only set the existing single submission flag. Ok, see below. } stati
Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists
On Fri, Nov 12, 2021 at 02:13:50PM +, Tvrtko Ursulin wrote: > > On 11/11/2021 16:49, Matthew Brost wrote: > > On Mon, Nov 01, 2021 at 10:35:09AM +, Tvrtko Ursulin wrote: > > > > > > On 27/10/2021 21:10, Matthew Brost wrote: > > > > On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: > > > > > On 10/27/2021 12:17, Matthew Brost wrote: > > > > > > On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: > > > > > > > On 10/20/2021 14:47, Matthew Brost wrote: > > > > > > > > A weak implementation of parallel submission (multi-bb execbuf > > > > > > > > IOCTL) for > > > > > > > > execlists. Doing as little as possible to support this > > > > > > > > interface for > > > > > > > > execlists - basically just passing submit fences between each > > > > > > > > request > > > > > > > > generated and virtual engines are not allowed. This is on par > > > > > > > > with what > > > > > > > > is there for the existing (hopefully soon deprecated) bonding > > > > > > > > interface. > > > > > > > > > > > > > > > > We perma-pin these execlists contexts to align with GuC > > > > > > > > implementation. > > > > > > > > > > > > > > > > v2: > > > > > > > > (John Harrison) > > > > > > > > - Drop siblings array as num_siblings must be 1 > > > > > > > > > > > > > > > > Signed-off-by: Matthew Brost > > > > > > > > --- > > > > > > > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- > > > > > > > > drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > > > > > > > > .../drm/i915/gt/intel_execlists_submission.c | 44 > > > > > > > > ++- > > > > > > > > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > > > > > > > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > > > > > > > > 5 files changed, 52 insertions(+), 10 deletions(-) > > > > > > > > > > > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > > > b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > > > index fb33d0322960..35e87a7d0ea9 100644 > > > > > > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > > > @@ -570,10 +570,6 @@ > > > > > > > > set_proto_ctx_engines_parallel_submit(struct > > > > > > > > i915_user_extension __user *base, > > > > > > > > struct intel_engine_cs **siblings = NULL; > > > > > > > > intel_engine_mask_t prev_mask; > > > > > > > > - /* FIXME: This is NIY for execlists */ > > > > > > > > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > > > > > > > > - return -ENODEV; > > > > > > > > - > > > > > > > > if (get_user(slot, &ext->engine_index)) > > > > > > > > return -EFAULT; > > > > > > > > @@ -583,6 +579,12 @@ > > > > > > > > set_proto_ctx_engines_parallel_submit(struct > > > > > > > > i915_user_extension __user *base, > > > > > > > > if (get_user(num_siblings, &ext->num_siblings)) > > > > > > > > return -EFAULT; > > > > > > > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && > > > > > > > > num_siblings != 1) { > > > > > > > > + drm_dbg(&i915->drm, "Only 1 sibling (%d) > > > > > > > > supported in non-GuC mode\n", > > > > > > > > + num_siblings); > > > > > > > > + return -EINVAL; > > > > > > > > + } > > > > > > > > + > > > > > > > > if (slot >= set->num_engines) { > > > > > > > > drm_dbg(&i915->drm, "Invalid placement value, > > > > > > > > %d >= %d\n", > > > > > > > > slot, set->num_engines); > > > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > > > b/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > > > index 5634d14052bc..1bec92e1d8e6 100644 > > > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > > > @@ -79,7 +79,8 @@ static int > > > > > > > > intel_context_active_acquire(struct intel_context *ce) > > > > > > > > __i915_active_acquire(&ce->active); > > > > > > > > - if (intel_context_is_barrier(ce) || > > > > > > > > intel_engine_uses_guc(ce->engine)) > > > > > > > > + if (intel_context_is_barrier(ce) || > > > > > > > > intel_engine_uses_guc(ce->engine) || > > > > > > > > + intel_context_is_parallel(ce)) > > > > > > > > return 0; > > > > > > > > /* Preallocate tracking nodes */ > > > > > > > > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct > > > > > > > > intel_context *parent, > > > > > > > > * Callers responsibility to validate that this > > > > > > > > function is used > > > > > > > > * correctly but we use GEM_BUG_ON here ensure that > > > > > > > > they do. > > > > > > > > */ > > > > > > > > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > > > > > > > > GEM_BUG_ON(intel_context_
Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists
On 11/11/2021 16:49, Matthew Brost wrote: On Mon, Nov 01, 2021 at 10:35:09AM +, Tvrtko Ursulin wrote: On 27/10/2021 21:10, Matthew Brost wrote: On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: On 10/27/2021 12:17, Matthew Brost wrote: On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: On 10/20/2021 14:47, Matthew Brost wrote: A weak implementation of parallel submission (multi-bb execbuf IOCTL) for execlists. Doing as little as possible to support this interface for execlists - basically just passing submit fences between each request generated and virtual engines are not allowed. This is on par with what is there for the existing (hopefully soon deprecated) bonding interface. We perma-pin these execlists contexts to align with GuC implementation. v2: (John Harrison) - Drop siblings array as num_siblings must be 1 Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- drivers/gpu/drm/i915/gt/intel_context.c | 4 +- .../drm/i915/gt/intel_execlists_submission.c | 44 ++- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - 5 files changed, 52 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fb33d0322960..35e87a7d0ea9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, struct intel_engine_cs **siblings = NULL; intel_engine_mask_t prev_mask; - /* FIXME: This is NIY for execlists */ - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) - return -ENODEV; - if (get_user(slot, &ext->engine_index)) return -EFAULT; @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, if (get_user(num_siblings, &ext->num_siblings)) return -EFAULT; + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", + num_siblings); + return -EINVAL; + } + if (slot >= set->num_engines) { drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", slot, set->num_engines); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5634d14052bc..1bec92e1d8e6 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) __i915_active_acquire(&ce->active); - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || + intel_context_is_parallel(ce)) return 0; /* Preallocate tracking nodes */ @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, * Callers responsibility to validate that this function is used * correctly but we use GEM_BUG_ON here ensure that they do. */ - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); GEM_BUG_ON(intel_context_is_pinned(parent)); GEM_BUG_ON(intel_context_is_child(parent)); GEM_BUG_ON(intel_context_is_pinned(child)); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index bedb80057046..2865b422300d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) static bool ctx_single_port_submission(const struct intel_context *ce) { - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && - intel_context_force_single_submission(ce)); + return intel_context_force_single_submission(ce); I think this is actually going to break GVT. Not so much this change here but the whole use of single submission outside of GVT. It looks like the GVT driver overloads the single submission flag to tag requests that it owns. If we start using that flag elsewhere when GVT is active, I think that will cause much confusion within the GVT code. The correct fix would be to create a new flag just for GVT usage alongside the single submission one. GVT would then set both but only check for its own private flag. The parallel code would obviously only set the existing single submission flag. Ok, see below. } static bool can_merge_ctx(const struct intel_context *prev, @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_reques
Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists
On Mon, Nov 01, 2021 at 10:35:09AM +, Tvrtko Ursulin wrote: > > On 27/10/2021 21:10, Matthew Brost wrote: > > On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: > > > On 10/27/2021 12:17, Matthew Brost wrote: > > > > On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: > > > > > On 10/20/2021 14:47, Matthew Brost wrote: > > > > > > A weak implementation of parallel submission (multi-bb execbuf > > > > > > IOCTL) for > > > > > > execlists. Doing as little as possible to support this interface for > > > > > > execlists - basically just passing submit fences between each > > > > > > request > > > > > > generated and virtual engines are not allowed. This is on par with > > > > > > what > > > > > > is there for the existing (hopefully soon deprecated) bonding > > > > > > interface. > > > > > > > > > > > > We perma-pin these execlists contexts to align with GuC > > > > > > implementation. > > > > > > > > > > > > v2: > > > > > > (John Harrison) > > > > > > - Drop siblings array as num_siblings must be 1 > > > > > > > > > > > > Signed-off-by: Matthew Brost > > > > > > --- > > > > > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- > > > > > > drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > > > > > > .../drm/i915/gt/intel_execlists_submission.c | 44 > > > > > > ++- > > > > > > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > > > > > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > > > > > > 5 files changed, 52 insertions(+), 10 deletions(-) > > > > > > > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > index fb33d0322960..35e87a7d0ea9 100644 > > > > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > > > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct > > > > > > i915_user_extension __user *base, > > > > > > struct intel_engine_cs **siblings = NULL; > > > > > > intel_engine_mask_t prev_mask; > > > > > > - /* FIXME: This is NIY for execlists */ > > > > > > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > > > > > > - return -ENODEV; > > > > > > - > > > > > > if (get_user(slot, &ext->engine_index)) > > > > > > return -EFAULT; > > > > > > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct > > > > > > i915_user_extension __user *base, > > > > > > if (get_user(num_siblings, &ext->num_siblings)) > > > > > > return -EFAULT; > > > > > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings > > > > > > != 1) { > > > > > > + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in > > > > > > non-GuC mode\n", > > > > > > + num_siblings); > > > > > > + return -EINVAL; > > > > > > + } > > > > > > + > > > > > > if (slot >= set->num_engines) { > > > > > > drm_dbg(&i915->drm, "Invalid placement value, > > > > > > %d >= %d\n", > > > > > > slot, set->num_engines); > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > b/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > index 5634d14052bc..1bec92e1d8e6 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > > > > > > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct > > > > > > intel_context *ce) > > > > > > __i915_active_acquire(&ce->active); > > > > > > - if (intel_context_is_barrier(ce) || > > > > > > intel_engine_uses_guc(ce->engine)) > > > > > > + if (intel_context_is_barrier(ce) || > > > > > > intel_engine_uses_guc(ce->engine) || > > > > > > + intel_context_is_parallel(ce)) > > > > > > return 0; > > > > > > /* Preallocate tracking nodes */ > > > > > > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct > > > > > > intel_context *parent, > > > > > > * Callers responsibility to validate that this > > > > > > function is used > > > > > > * correctly but we use GEM_BUG_ON here ensure that > > > > > > they do. > > > > > > */ > > > > > > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > > > > > > GEM_BUG_ON(intel_context_is_pinned(parent)); > > > > > > GEM_BUG_ON(intel_context_is_child(parent)); > > > > > > GEM_BUG_ON(intel_context_is_pinned(child)); > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > index bedb80057046..2865b422300d 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > > > @@
Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists
On 27/10/2021 21:10, Matthew Brost wrote: On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: On 10/27/2021 12:17, Matthew Brost wrote: On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: On 10/20/2021 14:47, Matthew Brost wrote: A weak implementation of parallel submission (multi-bb execbuf IOCTL) for execlists. Doing as little as possible to support this interface for execlists - basically just passing submit fences between each request generated and virtual engines are not allowed. This is on par with what is there for the existing (hopefully soon deprecated) bonding interface. We perma-pin these execlists contexts to align with GuC implementation. v2: (John Harrison) - Drop siblings array as num_siblings must be 1 Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- drivers/gpu/drm/i915/gt/intel_context.c | 4 +- .../drm/i915/gt/intel_execlists_submission.c | 44 ++- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - 5 files changed, 52 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fb33d0322960..35e87a7d0ea9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, struct intel_engine_cs **siblings = NULL; intel_engine_mask_t prev_mask; - /* FIXME: This is NIY for execlists */ - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) - return -ENODEV; - if (get_user(slot, &ext->engine_index)) return -EFAULT; @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, if (get_user(num_siblings, &ext->num_siblings)) return -EFAULT; + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", + num_siblings); + return -EINVAL; + } + if (slot >= set->num_engines) { drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", slot, set->num_engines); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5634d14052bc..1bec92e1d8e6 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) __i915_active_acquire(&ce->active); - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || + intel_context_is_parallel(ce)) return 0; /* Preallocate tracking nodes */ @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, * Callers responsibility to validate that this function is used * correctly but we use GEM_BUG_ON here ensure that they do. */ - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); GEM_BUG_ON(intel_context_is_pinned(parent)); GEM_BUG_ON(intel_context_is_child(parent)); GEM_BUG_ON(intel_context_is_pinned(child)); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index bedb80057046..2865b422300d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) static bool ctx_single_port_submission(const struct intel_context *ce) { - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && - intel_context_force_single_submission(ce)); + return intel_context_force_single_submission(ce); I think this is actually going to break GVT. Not so much this change here but the whole use of single submission outside of GVT. It looks like the GVT driver overloads the single submission flag to tag requests that it owns. If we start using that flag elsewhere when GVT is active, I think that will cause much confusion within the GVT code. The correct fix would be to create a new flag just for GVT usage alongside the single submission one. GVT would then set both but only check for its own private flag. The parallel code would obviously only set the existing single submission flag. Ok, see below. } static bool can_merge_ctx(const struct intel_context *prev, @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, current->comm); } +static struct intel_context * +
Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists
On Wed, Oct 27, 2021 at 01:04:49PM -0700, John Harrison wrote: > On 10/27/2021 12:17, Matthew Brost wrote: > > On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: > > > On 10/20/2021 14:47, Matthew Brost wrote: > > > > A weak implementation of parallel submission (multi-bb execbuf IOCTL) > > > > for > > > > execlists. Doing as little as possible to support this interface for > > > > execlists - basically just passing submit fences between each request > > > > generated and virtual engines are not allowed. This is on par with what > > > > is there for the existing (hopefully soon deprecated) bonding interface. > > > > > > > > We perma-pin these execlists contexts to align with GuC implementation. > > > > > > > > v2: > > > >(John Harrison) > > > > - Drop siblings array as num_siblings must be 1 > > > > > > > > Signed-off-by: Matthew Brost > > > > --- > > > >drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- > > > >drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > > > >.../drm/i915/gt/intel_execlists_submission.c | 44 > > > > ++- > > > >drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > > > >.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > > > >5 files changed, 52 insertions(+), 10 deletions(-) > > > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > index fb33d0322960..35e87a7d0ea9 100644 > > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct > > > > i915_user_extension __user *base, > > > > struct intel_engine_cs **siblings = NULL; > > > > intel_engine_mask_t prev_mask; > > > > - /* FIXME: This is NIY for execlists */ > > > > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > > > > - return -ENODEV; > > > > - > > > > if (get_user(slot, &ext->engine_index)) > > > > return -EFAULT; > > > > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct > > > > i915_user_extension __user *base, > > > > if (get_user(num_siblings, &ext->num_siblings)) > > > > return -EFAULT; > > > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings > > > > != 1) { > > > > + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in > > > > non-GuC mode\n", > > > > + num_siblings); > > > > + return -EINVAL; > > > > + } > > > > + > > > > if (slot >= set->num_engines) { > > > > drm_dbg(&i915->drm, "Invalid placement value, %d >= > > > > %d\n", > > > > slot, set->num_engines); > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c > > > > b/drivers/gpu/drm/i915/gt/intel_context.c > > > > index 5634d14052bc..1bec92e1d8e6 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_context.c > > > > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > > > > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct > > > > intel_context *ce) > > > > __i915_active_acquire(&ce->active); > > > > - if (intel_context_is_barrier(ce) || > > > > intel_engine_uses_guc(ce->engine)) > > > > + if (intel_context_is_barrier(ce) || > > > > intel_engine_uses_guc(ce->engine) || > > > > + intel_context_is_parallel(ce)) > > > > return 0; > > > > /* Preallocate tracking nodes */ > > > > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct > > > > intel_context *parent, > > > > * Callers responsibility to validate that this function is used > > > > * correctly but we use GEM_BUG_ON here ensure that they do. > > > > */ > > > > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > > > > GEM_BUG_ON(intel_context_is_pinned(parent)); > > > > GEM_BUG_ON(intel_context_is_child(parent)); > > > > GEM_BUG_ON(intel_context_is_pinned(child)); > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > index bedb80057046..2865b422300d 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct > > > > intel_engine_cs *engine) > > > >static bool ctx_single_port_submission(const struct intel_context > > > > *ce) > > > >{ > > > > - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && > > > > - intel_context_force_single_submission(ce)); > > > > + return intel_context_force_single_submission(ce); > > > I think this is actually going to break GVT. > > > > > > Not so much this change here but the whole use of single submission > > > outside > > > of GVT. It looks like the GVT
Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists
On 10/27/2021 12:17, Matthew Brost wrote: On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: On 10/20/2021 14:47, Matthew Brost wrote: A weak implementation of parallel submission (multi-bb execbuf IOCTL) for execlists. Doing as little as possible to support this interface for execlists - basically just passing submit fences between each request generated and virtual engines are not allowed. This is on par with what is there for the existing (hopefully soon deprecated) bonding interface. We perma-pin these execlists contexts to align with GuC implementation. v2: (John Harrison) - Drop siblings array as num_siblings must be 1 Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- drivers/gpu/drm/i915/gt/intel_context.c | 4 +- .../drm/i915/gt/intel_execlists_submission.c | 44 ++- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - 5 files changed, 52 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fb33d0322960..35e87a7d0ea9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, struct intel_engine_cs **siblings = NULL; intel_engine_mask_t prev_mask; - /* FIXME: This is NIY for execlists */ - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) - return -ENODEV; - if (get_user(slot, &ext->engine_index)) return -EFAULT; @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, if (get_user(num_siblings, &ext->num_siblings)) return -EFAULT; + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", + num_siblings); + return -EINVAL; + } + if (slot >= set->num_engines) { drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", slot, set->num_engines); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5634d14052bc..1bec92e1d8e6 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) __i915_active_acquire(&ce->active); - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || + intel_context_is_parallel(ce)) return 0; /* Preallocate tracking nodes */ @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, * Callers responsibility to validate that this function is used * correctly but we use GEM_BUG_ON here ensure that they do. */ - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); GEM_BUG_ON(intel_context_is_pinned(parent)); GEM_BUG_ON(intel_context_is_child(parent)); GEM_BUG_ON(intel_context_is_pinned(child)); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index bedb80057046..2865b422300d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) static bool ctx_single_port_submission(const struct intel_context *ce) { - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && - intel_context_force_single_submission(ce)); + return intel_context_force_single_submission(ce); I think this is actually going to break GVT. Not so much this change here but the whole use of single submission outside of GVT. It looks like the GVT driver overloads the single submission flag to tag requests that it owns. If we start using that flag elsewhere when GVT is active, I think that will cause much confusion within the GVT code. The correct fix would be to create a new flag just for GVT usage alongside the single submission one. GVT would then set both but only check for its own private flag. The parallel code would obviously only set the existing single submission flag. Ok, see below. } static bool can_merge_ctx(const struct intel_context *prev, @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, current->comm); } +static struct intel_context * +execlists_create_parallel(struct intel_engine_cs **engines, + unsigned int num_siblings, +
Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists
On Tue, Oct 26, 2021 at 02:58:00PM -0700, John Harrison wrote: > On 10/20/2021 14:47, Matthew Brost wrote: > > A weak implementation of parallel submission (multi-bb execbuf IOCTL) for > > execlists. Doing as little as possible to support this interface for > > execlists - basically just passing submit fences between each request > > generated and virtual engines are not allowed. This is on par with what > > is there for the existing (hopefully soon deprecated) bonding interface. > > > > We perma-pin these execlists contexts to align with GuC implementation. > > > > v2: > > (John Harrison) > >- Drop siblings array as num_siblings must be 1 > > > > Signed-off-by: Matthew Brost > > --- > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- > > drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > > .../drm/i915/gt/intel_execlists_submission.c | 44 ++- > > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > > 5 files changed, 52 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > index fb33d0322960..35e87a7d0ea9 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct > > i915_user_extension __user *base, > > struct intel_engine_cs **siblings = NULL; > > intel_engine_mask_t prev_mask; > > - /* FIXME: This is NIY for execlists */ > > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > > - return -ENODEV; > > - > > if (get_user(slot, &ext->engine_index)) > > return -EFAULT; > > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct > > i915_user_extension __user *base, > > if (get_user(num_siblings, &ext->num_siblings)) > > return -EFAULT; > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { > > + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC > > mode\n", > > + num_siblings); > > + return -EINVAL; > > + } > > + > > if (slot >= set->num_engines) { > > drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", > > slot, set->num_engines); > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c > > b/drivers/gpu/drm/i915/gt/intel_context.c > > index 5634d14052bc..1bec92e1d8e6 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_context.c > > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct > > intel_context *ce) > > __i915_active_acquire(&ce->active); > > - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) > > + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || > > + intel_context_is_parallel(ce)) > > return 0; > > /* Preallocate tracking nodes */ > > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct > > intel_context *parent, > > * Callers responsibility to validate that this function is used > > * correctly but we use GEM_BUG_ON here ensure that they do. > > */ > > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > > GEM_BUG_ON(intel_context_is_pinned(parent)); > > GEM_BUG_ON(intel_context_is_child(parent)); > > GEM_BUG_ON(intel_context_is_pinned(child)); > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > index bedb80057046..2865b422300d 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct > > intel_engine_cs *engine) > > static bool ctx_single_port_submission(const struct intel_context *ce) > > { > > - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && > > - intel_context_force_single_submission(ce)); > > + return intel_context_force_single_submission(ce); > I think this is actually going to break GVT. > > Not so much this change here but the whole use of single submission outside > of GVT. It looks like the GVT driver overloads the single submission flag to > tag requests that it owns. If we start using that flag elsewhere when GVT is > active, I think that will cause much confusion within the GVT code. > > The correct fix would be to create a new flag just for GVT usage alongside > the single submission one. GVT would then set both but only check for its > own private flag. The parallel code would obviously only set the existing > single submission flag. > Ok, see below. > > > } > > static bool can_merge_ctx(const struct intel_context *prev, > > @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct > > intel_context *ce, > >
Re: [Intel-gfx] [PATCH] drm/i915/execlists: Weak parallel submission support for execlists
On 10/20/2021 14:47, Matthew Brost wrote: A weak implementation of parallel submission (multi-bb execbuf IOCTL) for execlists. Doing as little as possible to support this interface for execlists - basically just passing submit fences between each request generated and virtual engines are not allowed. This is on par with what is there for the existing (hopefully soon deprecated) bonding interface. We perma-pin these execlists contexts to align with GuC implementation. v2: (John Harrison) - Drop siblings array as num_siblings must be 1 Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++-- drivers/gpu/drm/i915/gt/intel_context.c | 4 +- .../drm/i915/gt/intel_execlists_submission.c | 44 ++- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - 5 files changed, 52 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fb33d0322960..35e87a7d0ea9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, struct intel_engine_cs **siblings = NULL; intel_engine_mask_t prev_mask; - /* FIXME: This is NIY for execlists */ - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) - return -ENODEV; - if (get_user(slot, &ext->engine_index)) return -EFAULT; @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, if (get_user(num_siblings, &ext->num_siblings)) return -EFAULT; + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", + num_siblings); + return -EINVAL; + } + if (slot >= set->num_engines) { drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", slot, set->num_engines); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5634d14052bc..1bec92e1d8e6 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) __i915_active_acquire(&ce->active); - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || + intel_context_is_parallel(ce)) return 0; /* Preallocate tracking nodes */ @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, * Callers responsibility to validate that this function is used * correctly but we use GEM_BUG_ON here ensure that they do. */ - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); GEM_BUG_ON(intel_context_is_pinned(parent)); GEM_BUG_ON(intel_context_is_child(parent)); GEM_BUG_ON(intel_context_is_pinned(child)); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index bedb80057046..2865b422300d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) static bool ctx_single_port_submission(const struct intel_context *ce) { - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && - intel_context_force_single_submission(ce)); + return intel_context_force_single_submission(ce); I think this is actually going to break GVT. Not so much this change here but the whole use of single submission outside of GVT. It looks like the GVT driver overloads the single submission flag to tag requests that it owns. If we start using that flag elsewhere when GVT is active, I think that will cause much confusion within the GVT code. The correct fix would be to create a new flag just for GVT usage alongside the single submission one. GVT would then set both but only check for its own private flag. The parallel code would obviously only set the existing single submission flag. } static bool can_merge_ctx(const struct intel_context *prev, @@ -2598,6 +2597,46 @@ static void execlists_context_cancel_request(struct intel_context *ce, current->comm); } +static struct intel_context * +execlists_create_parallel(struct intel_engine_cs **engines, + unsigned int num_siblings, + unsigned int width) +{ + struct intel_context *parent = NULL, *ce, *err; + int i; + +