[Intel-gfx] [PATCH 1/5] drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser"

2021-07-14 Thread Jason Ekstrand
This reverts 686c7c35abc2 ("drm/i915/gem: Asynchronous cmdparser").  The
justification for this commit in the git history was a vague comment
about getting it out from under the struct_mutex.  While this may
improve perf for some workloads on Gen7 platforms where we rely on the
command parser for features such as indirect rendering, no numbers were
provided to prove such an improvement.  It claims to closed two
gitlab/bugzilla issues but with no explanation whatsoever as to why or
what bug it's fixing.

Meanwhile, by moving command parsing off to an async callback, it leaves
us with a problem of what to do on error.  When things were synchronous,
EXECBUFFER2 would fail with an error code if parsing failed.  When
moving it to async, we needed another way to handle that error and the
solution employed was to set an error on the dma_fence and then trust
that said error gets propagated to the client eventually.  Moving back
to synchronous will help us untangle the fence error propagation mess.

This also reverts most of 0edbb9ba1bfe ("drm/i915: Move cmd parser
pinning to execbuffer") which is a refactor of some of our allocation
paths for asynchronous parsing.  Now that everything is synchronous, we
don't need it.

v2 (Daniel Vetter):
 - Add stabel Cc and Fixes tag

Signed-off-by: Jason Ekstrand 
Cc:  # v5.6+
Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled 
fences")
Cc: Maarten Lankhorst 
Reviewed-by: Jon Bloomfield 
Acked-by: Daniel Vetter 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 227 +-
 .../i915/gem/selftests/i915_gem_execbuffer.c  |   4 +
 drivers/gpu/drm/i915/i915_cmd_parser.c| 132 +-
 drivers/gpu/drm/i915/i915_drv.h   |   7 +-
 4 files changed, 91 insertions(+), 279 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 5ea8b4e23e428..1ed7475de454d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -25,10 +25,8 @@
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
-#include "i915_sw_fence_work.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
-#include "i915_memcpy.h"
 
 struct eb_vma {
struct i915_vma *vma;
@@ -1471,6 +1469,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
int err;
struct intel_engine_cs *engine = eb->engine;
 
+   /* If we need to copy for the cmdparser, we will stall anyway */
+   if (eb_use_cmdparser(eb))
+   return ERR_PTR(-EWOULDBLOCK);
+
if (!reloc_can_use_engine(engine)) {
engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
if (!engine)
@@ -2385,217 +2387,6 @@ shadow_batch_pin(struct i915_execbuffer *eb,
return vma;
 }
 
-struct eb_parse_work {
-   struct dma_fence_work base;
-   struct intel_engine_cs *engine;
-   struct i915_vma *batch;
-   struct i915_vma *shadow;
-   struct i915_vma *trampoline;
-   unsigned long batch_offset;
-   unsigned long batch_length;
-   unsigned long *jump_whitelist;
-   const void *batch_map;
-   void *shadow_map;
-};
-
-static int __eb_parse(struct dma_fence_work *work)
-{
-   struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-   int ret;
-   bool cookie;
-
-   cookie = dma_fence_begin_signalling();
-   ret = intel_engine_cmd_parser(pw->engine,
- pw->batch,
- pw->batch_offset,
- pw->batch_length,
- pw->shadow,
- pw->jump_whitelist,
- pw->shadow_map,
- pw->batch_map);
-   dma_fence_end_signalling(cookie);
-
-   return ret;
-}
-
-static void __eb_parse_release(struct dma_fence_work *work)
-{
-   struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-
-   if (!IS_ERR_OR_NULL(pw->jump_whitelist))
-   kfree(pw->jump_whitelist);
-
-   if (pw->batch_map)
-   i915_gem_object_unpin_map(pw->batch->obj);
-   else
-   i915_gem_object_unpin_pages(pw->batch->obj);
-
-   i915_gem_object_unpin_map(pw->shadow->obj);
-
-   if (pw->trampoline)
-   i915_active_release(>trampoline->active);
-   i915_active_release(>shadow->active);
-   i915_active_release(>batch->active);
-}
-
-static const struct dma_fence_work_ops eb_parse_ops = {
-   .name = "eb_parse",
-   .work = __eb_parse,
-   .release = __eb_parse_release,
-};
-
-static inline int
-__parser_mark_active(struct i915_vma *vma,
-struct intel_timeline *tl,
-struct dma_fence *fence)
-{
-   struct 

[Intel-gfx] [PATCH 1/5] drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser"

2021-07-10 Thread Jason Ekstrand
This reverts 686c7c35abc2 ("drm/i915/gem: Asynchronous cmdparser").  The
justification for this commit in the git history was a vague comment
about getting it out from under the struct_mutex.  While this may
improve perf for some workloads on Gen7 platforms where we rely on the
command parser for features such as indirect rendering, no numbers were
provided to prove such an improvement.  It claims to closed two
gitlab/bugzilla issues but with no explanation whatsoever as to why or
what bug it's fixing.

Meanwhile, by moving command parsing off to an async callback, it leaves
us with a problem of what to do on error.  When things were synchronous,
EXECBUFFER2 would fail with an error code if parsing failed.  When
moving it to async, we needed another way to handle that error and the
solution employed was to set an error on the dma_fence and then trust
that said error gets propagated to the client eventually.  Moving back
to synchronous will help us untangle the fence error propagation mess.

This also reverts most of 0edbb9ba1bfe ("drm/i915: Move cmd parser
pinning to execbuffer") which is a refactor of some of our allocation
paths for asynchronous parsing.  Now that everything is synchronous, we
don't need it.

v2 (Daniel Vetter):
 - Add stabel Cc and Fixes tag

Signed-off-by: Jason Ekstrand 
Cc:  # v5.6+
Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled 
fences")
Cc: Maarten Lankhorst 
Reviewed-by: Jon Bloomfield 
Acked-by: Daniel Vetter 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 227 +-
 .../i915/gem/selftests/i915_gem_execbuffer.c  |   4 +
 drivers/gpu/drm/i915/i915_cmd_parser.c| 132 +-
 drivers/gpu/drm/i915/i915_drv.h   |   7 +-
 4 files changed, 91 insertions(+), 279 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 5ea8b4e23e428..1ed7475de454d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -25,10 +25,8 @@
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
-#include "i915_sw_fence_work.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
-#include "i915_memcpy.h"
 
 struct eb_vma {
struct i915_vma *vma;
@@ -1471,6 +1469,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
int err;
struct intel_engine_cs *engine = eb->engine;
 
+   /* If we need to copy for the cmdparser, we will stall anyway */
+   if (eb_use_cmdparser(eb))
+   return ERR_PTR(-EWOULDBLOCK);
+
if (!reloc_can_use_engine(engine)) {
engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
if (!engine)
@@ -2385,217 +2387,6 @@ shadow_batch_pin(struct i915_execbuffer *eb,
return vma;
 }
 
-struct eb_parse_work {
-   struct dma_fence_work base;
-   struct intel_engine_cs *engine;
-   struct i915_vma *batch;
-   struct i915_vma *shadow;
-   struct i915_vma *trampoline;
-   unsigned long batch_offset;
-   unsigned long batch_length;
-   unsigned long *jump_whitelist;
-   const void *batch_map;
-   void *shadow_map;
-};
-
-static int __eb_parse(struct dma_fence_work *work)
-{
-   struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-   int ret;
-   bool cookie;
-
-   cookie = dma_fence_begin_signalling();
-   ret = intel_engine_cmd_parser(pw->engine,
- pw->batch,
- pw->batch_offset,
- pw->batch_length,
- pw->shadow,
- pw->jump_whitelist,
- pw->shadow_map,
- pw->batch_map);
-   dma_fence_end_signalling(cookie);
-
-   return ret;
-}
-
-static void __eb_parse_release(struct dma_fence_work *work)
-{
-   struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-
-   if (!IS_ERR_OR_NULL(pw->jump_whitelist))
-   kfree(pw->jump_whitelist);
-
-   if (pw->batch_map)
-   i915_gem_object_unpin_map(pw->batch->obj);
-   else
-   i915_gem_object_unpin_pages(pw->batch->obj);
-
-   i915_gem_object_unpin_map(pw->shadow->obj);
-
-   if (pw->trampoline)
-   i915_active_release(>trampoline->active);
-   i915_active_release(>shadow->active);
-   i915_active_release(>batch->active);
-}
-
-static const struct dma_fence_work_ops eb_parse_ops = {
-   .name = "eb_parse",
-   .work = __eb_parse,
-   .release = __eb_parse_release,
-};
-
-static inline int
-__parser_mark_active(struct i915_vma *vma,
-struct intel_timeline *tl,
-struct dma_fence *fence)
-{
-   struct 

[Intel-gfx] [PATCH 1/5] drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser"

2021-06-03 Thread Jason Ekstrand
This reverts 686c7c35abc2 ("drm/i915/gem: Asynchronous cmdparser").  The
justification for this commit in the git history was a vague comment
about getting it out from under the struct_mutex.  While this may
improve perf for some workloads on Gen7 platforms where we rely on the
command parser for features such as indirect rendering, no numbers were
provided to prove such an improvement.  It claims to closed two
gitlab/bugzilla issues but with no explanation whatsoever as to why or
what bug it's fixing.

Meanwhile, by moving command parsing off to an async callback, it leaves
us with a problem of what to do on error.  When things were synchronous,
EXECBUFFER2 would fail with an error code if parsing failed.  When
moving it to async, we needed another way to handle that error and the
solution employed was to set an error on the dma_fence and then trust
that said error gets propagated to the client eventually.  Moving back
to synchronous will help us untangle the fence error propagation mess.

This also reverts most of 0edbb9ba1bfe ("drm/i915: Move cmd parser
pinning to execbuffer") which is a refactor of some of our allocation
paths for asynchronous parsing.  Now that everything is synchronous, we
don't need it.

v2 (Daniel Vetter):
 - Add stabel Cc and Fixes tag

Signed-off-by: Jason Ekstrand 
Cc:  # v5.6+
Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled 
fences")
Cc: Maarten Lankhorst 
Reviewed-by: Jon Bloomfield 
Acked-by: Daniel Vetter 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 227 +-
 .../i915/gem/selftests/i915_gem_execbuffer.c  |   4 +
 drivers/gpu/drm/i915/i915_cmd_parser.c| 132 +-
 drivers/gpu/drm/i915/i915_drv.h   |   7 +-
 4 files changed, 91 insertions(+), 279 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 297143511f99b..a49da4b24d4d4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -25,10 +25,8 @@
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
-#include "i915_sw_fence_work.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
-#include "i915_memcpy.h"
 
 struct eb_vma {
struct i915_vma *vma;
@@ -1456,6 +1454,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
int err;
struct intel_engine_cs *engine = eb->engine;
 
+   /* If we need to copy for the cmdparser, we will stall anyway */
+   if (eb_use_cmdparser(eb))
+   return ERR_PTR(-EWOULDBLOCK);
+
if (!reloc_can_use_engine(engine)) {
engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
if (!engine)
@@ -2372,217 +2374,6 @@ shadow_batch_pin(struct i915_execbuffer *eb,
return vma;
 }
 
-struct eb_parse_work {
-   struct dma_fence_work base;
-   struct intel_engine_cs *engine;
-   struct i915_vma *batch;
-   struct i915_vma *shadow;
-   struct i915_vma *trampoline;
-   unsigned long batch_offset;
-   unsigned long batch_length;
-   unsigned long *jump_whitelist;
-   const void *batch_map;
-   void *shadow_map;
-};
-
-static int __eb_parse(struct dma_fence_work *work)
-{
-   struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-   int ret;
-   bool cookie;
-
-   cookie = dma_fence_begin_signalling();
-   ret = intel_engine_cmd_parser(pw->engine,
- pw->batch,
- pw->batch_offset,
- pw->batch_length,
- pw->shadow,
- pw->jump_whitelist,
- pw->shadow_map,
- pw->batch_map);
-   dma_fence_end_signalling(cookie);
-
-   return ret;
-}
-
-static void __eb_parse_release(struct dma_fence_work *work)
-{
-   struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-
-   if (!IS_ERR_OR_NULL(pw->jump_whitelist))
-   kfree(pw->jump_whitelist);
-
-   if (pw->batch_map)
-   i915_gem_object_unpin_map(pw->batch->obj);
-   else
-   i915_gem_object_unpin_pages(pw->batch->obj);
-
-   i915_gem_object_unpin_map(pw->shadow->obj);
-
-   if (pw->trampoline)
-   i915_active_release(>trampoline->active);
-   i915_active_release(>shadow->active);
-   i915_active_release(>batch->active);
-}
-
-static const struct dma_fence_work_ops eb_parse_ops = {
-   .name = "eb_parse",
-   .work = __eb_parse,
-   .release = __eb_parse_release,
-};
-
-static inline int
-__parser_mark_active(struct i915_vma *vma,
-struct intel_timeline *tl,
-struct dma_fence *fence)
-{
-   struct 

Re: [Intel-gfx] [PATCH 1/5] drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser"

2021-06-03 Thread Jason Ekstrand
On Thu, Jun 3, 2021 at 3:22 AM Daniel Vetter  wrote:
>
> On Wed, Jun 02, 2021 at 11:41:45AM -0500, Jason Ekstrand wrote:
> > This reverts 686c7c35abc2 ("drm/i915/gem: Asynchronous cmdparser").  The
> > justification for this commit in the git history was a vague comment
> > about getting it out from under the struct_mutex.  While this may
> > improve perf for some workloads on Gen7 platforms where we rely on the
> > command parser for features such as indirect rendering, no numbers were
> > provided to prove such an improvement.  It claims to closed two
> > gitlab/bugzilla issues but with no explanation whatsoever as to why or
> > what bug it's fixing.
> >
> > Meanwhile, by moving command parsing off to an async callback, it leaves
> > us with a problem of what to do on error.  When things were synchronous,
> > EXECBUFFER2 would fail with an error code if parsing failed.  When
> > moving it to async, we needed another way to handle that error and the
> > solution employed was to set an error on the dma_fence and then trust
> > that said error gets propagated to the client eventually.  Moving back
> > to synchronous will help us untangle the fence error propagation mess.
> >
> > This also reverts most of 0edbb9ba1bfe ("drm/i915: Move cmd parser
> > pinning to execbuffer") which is a refactor of some of our allocation
> > paths for asynchronous parsing.  Now that everything is synchronous, we
> > don't need it.
> >
> > Signed-off-by: Jason Ekstrand 
> > Cc: Maarten Lankhorst 
> > Reviewed-by: Jon Bloomfield 
>
> This needs the same Cc: stable and Fixes: lines as the dma_fence error
> propagation revert. Otherwise the cmd parser breaks, which isn't great.

Done.  I may have to create multiple versions of this patch for Greg
but I can do that.
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/5] drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser"

2021-06-03 Thread Daniel Vetter
On Wed, Jun 02, 2021 at 11:41:45AM -0500, Jason Ekstrand wrote:
> This reverts 686c7c35abc2 ("drm/i915/gem: Asynchronous cmdparser").  The
> justification for this commit in the git history was a vague comment
> about getting it out from under the struct_mutex.  While this may
> improve perf for some workloads on Gen7 platforms where we rely on the
> command parser for features such as indirect rendering, no numbers were
> provided to prove such an improvement.  It claims to closed two
> gitlab/bugzilla issues but with no explanation whatsoever as to why or
> what bug it's fixing.
> 
> Meanwhile, by moving command parsing off to an async callback, it leaves
> us with a problem of what to do on error.  When things were synchronous,
> EXECBUFFER2 would fail with an error code if parsing failed.  When
> moving it to async, we needed another way to handle that error and the
> solution employed was to set an error on the dma_fence and then trust
> that said error gets propagated to the client eventually.  Moving back
> to synchronous will help us untangle the fence error propagation mess.
> 
> This also reverts most of 0edbb9ba1bfe ("drm/i915: Move cmd parser
> pinning to execbuffer") which is a refactor of some of our allocation
> paths for asynchronous parsing.  Now that everything is synchronous, we
> don't need it.
> 
> Signed-off-by: Jason Ekstrand 
> Cc: Maarten Lankhorst 
> Reviewed-by: Jon Bloomfield 

This needs the same Cc: stable and Fixes: lines as the dma_fence error
propagation revert. Otherwise the cmd parser breaks, which isn't great.

> ---
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 227 +-
>  .../i915/gem/selftests/i915_gem_execbuffer.c  |   4 +
>  drivers/gpu/drm/i915/i915_cmd_parser.c| 132 +-
>  drivers/gpu/drm/i915/i915_drv.h   |   7 +-
>  4 files changed, 91 insertions(+), 279 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 297143511f99b..a49da4b24d4d4 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -25,10 +25,8 @@
>  #include "i915_gem_clflush.h"
>  #include "i915_gem_context.h"
>  #include "i915_gem_ioctls.h"
> -#include "i915_sw_fence_work.h"
>  #include "i915_trace.h"
>  #include "i915_user_extensions.h"
> -#include "i915_memcpy.h"
>  
>  struct eb_vma {
>   struct i915_vma *vma;
> @@ -1456,6 +1454,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
>   int err;
>   struct intel_engine_cs *engine = eb->engine;
>  
> + /* If we need to copy for the cmdparser, we will stall anyway */
> + if (eb_use_cmdparser(eb))
> + return ERR_PTR(-EWOULDBLOCK);
> +
>   if (!reloc_can_use_engine(engine)) {
>   engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
>   if (!engine)
> @@ -2372,217 +2374,6 @@ shadow_batch_pin(struct i915_execbuffer *eb,
>   return vma;
>  }
>  
> -struct eb_parse_work {
> - struct dma_fence_work base;
> - struct intel_engine_cs *engine;
> - struct i915_vma *batch;
> - struct i915_vma *shadow;
> - struct i915_vma *trampoline;
> - unsigned long batch_offset;
> - unsigned long batch_length;
> - unsigned long *jump_whitelist;
> - const void *batch_map;
> - void *shadow_map;
> -};
> -
> -static int __eb_parse(struct dma_fence_work *work)
> -{
> - struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
> - int ret;
> - bool cookie;
> -
> - cookie = dma_fence_begin_signalling();
> - ret = intel_engine_cmd_parser(pw->engine,
> -   pw->batch,
> -   pw->batch_offset,
> -   pw->batch_length,
> -   pw->shadow,
> -   pw->jump_whitelist,
> -   pw->shadow_map,
> -   pw->batch_map);
> - dma_fence_end_signalling(cookie);
> -
> - return ret;
> -}
> -
> -static void __eb_parse_release(struct dma_fence_work *work)
> -{
> - struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
> -
> - if (!IS_ERR_OR_NULL(pw->jump_whitelist))
> - kfree(pw->jump_whitelist);
> -
> - if (pw->batch_map)
> - i915_gem_object_unpin_map(pw->batch->obj);
> - else
> - i915_gem_object_unpin_pages(pw->batch->obj);
> -
> - i915_gem_object_unpin_map(pw->shadow->obj);
> -
> - if (pw->trampoline)
> - i915_active_release(>trampoline->active);
> - i915_active_release(>shadow->active);
> - i915_active_release(>batch->active);
> -}
> -
> -static const struct dma_fence_work_ops eb_parse_ops = {
> - .name = "eb_parse",
> - .work = __eb_parse,
> - .release = __eb_parse_release,
> -};
> -
> 

[Intel-gfx] [PATCH 1/5] drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser"

2021-06-02 Thread Jason Ekstrand
This reverts 686c7c35abc2 ("drm/i915/gem: Asynchronous cmdparser").  The
justification for this commit in the git history was a vague comment
about getting it out from under the struct_mutex.  While this may
improve perf for some workloads on Gen7 platforms where we rely on the
command parser for features such as indirect rendering, no numbers were
provided to prove such an improvement.  It claims to closed two
gitlab/bugzilla issues but with no explanation whatsoever as to why or
what bug it's fixing.

Meanwhile, by moving command parsing off to an async callback, it leaves
us with a problem of what to do on error.  When things were synchronous,
EXECBUFFER2 would fail with an error code if parsing failed.  When
moving it to async, we needed another way to handle that error and the
solution employed was to set an error on the dma_fence and then trust
that said error gets propagated to the client eventually.  Moving back
to synchronous will help us untangle the fence error propagation mess.

This also reverts most of 0edbb9ba1bfe ("drm/i915: Move cmd parser
pinning to execbuffer") which is a refactor of some of our allocation
paths for asynchronous parsing.  Now that everything is synchronous, we
don't need it.

Signed-off-by: Jason Ekstrand 
Cc: Maarten Lankhorst 
Reviewed-by: Jon Bloomfield 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 227 +-
 .../i915/gem/selftests/i915_gem_execbuffer.c  |   4 +
 drivers/gpu/drm/i915/i915_cmd_parser.c| 132 +-
 drivers/gpu/drm/i915/i915_drv.h   |   7 +-
 4 files changed, 91 insertions(+), 279 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 297143511f99b..a49da4b24d4d4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -25,10 +25,8 @@
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
-#include "i915_sw_fence_work.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
-#include "i915_memcpy.h"
 
 struct eb_vma {
struct i915_vma *vma;
@@ -1456,6 +1454,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
int err;
struct intel_engine_cs *engine = eb->engine;
 
+   /* If we need to copy for the cmdparser, we will stall anyway */
+   if (eb_use_cmdparser(eb))
+   return ERR_PTR(-EWOULDBLOCK);
+
if (!reloc_can_use_engine(engine)) {
engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
if (!engine)
@@ -2372,217 +2374,6 @@ shadow_batch_pin(struct i915_execbuffer *eb,
return vma;
 }
 
-struct eb_parse_work {
-   struct dma_fence_work base;
-   struct intel_engine_cs *engine;
-   struct i915_vma *batch;
-   struct i915_vma *shadow;
-   struct i915_vma *trampoline;
-   unsigned long batch_offset;
-   unsigned long batch_length;
-   unsigned long *jump_whitelist;
-   const void *batch_map;
-   void *shadow_map;
-};
-
-static int __eb_parse(struct dma_fence_work *work)
-{
-   struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-   int ret;
-   bool cookie;
-
-   cookie = dma_fence_begin_signalling();
-   ret = intel_engine_cmd_parser(pw->engine,
- pw->batch,
- pw->batch_offset,
- pw->batch_length,
- pw->shadow,
- pw->jump_whitelist,
- pw->shadow_map,
- pw->batch_map);
-   dma_fence_end_signalling(cookie);
-
-   return ret;
-}
-
-static void __eb_parse_release(struct dma_fence_work *work)
-{
-   struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-
-   if (!IS_ERR_OR_NULL(pw->jump_whitelist))
-   kfree(pw->jump_whitelist);
-
-   if (pw->batch_map)
-   i915_gem_object_unpin_map(pw->batch->obj);
-   else
-   i915_gem_object_unpin_pages(pw->batch->obj);
-
-   i915_gem_object_unpin_map(pw->shadow->obj);
-
-   if (pw->trampoline)
-   i915_active_release(>trampoline->active);
-   i915_active_release(>shadow->active);
-   i915_active_release(>batch->active);
-}
-
-static const struct dma_fence_work_ops eb_parse_ops = {
-   .name = "eb_parse",
-   .work = __eb_parse,
-   .release = __eb_parse_release,
-};
-
-static inline int
-__parser_mark_active(struct i915_vma *vma,
-struct intel_timeline *tl,
-struct dma_fence *fence)
-{
-   struct intel_gt_buffer_pool_node *node = vma->private;
-
-   return i915_active_ref(>active, tl->fence_context, fence);
-}
-
-static int
-parser_mark_active(struct eb_parse_work *pw,