Re: [Intel-gfx] [PATCH] drm/i915/tgl: Extend MI_SEMAPHORE_WAIT

2019-09-17 Thread Mika Kuoppala
Chris Wilson  writes:

> On Tigerlake, MI_SEMAPHORE_WAIT grew an extra dword, so be sure to
> update the length field and emit that extra parameter and any padding
> noop as required.
>
> v2: Define the token shift while we are adding the updated MI_SEMAPHORE_WAIT
> v3: Use int instead of bool in the addition so that readers are not left
> wondering about the intricacies of the C spec. Now they just have to
> worry what the integer value of a boolean operation is...
>
> Signed-off-by: Chris Wilson 
> Cc: Mika Kuoppala 
> Cc: Daniele Ceraolo Spurio 
> Cc: Michal Winiarski 

Reviewed-by: Mika Kuoppala 

> ---
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  3 +
>  drivers/gpu/drm/i915/gt/intel_lrc.c  | 71 ++--
>  drivers/gpu/drm/i915/i915_pci.c  |  1 -
>  drivers/gpu/drm/i915/i915_request.c  | 21 --
>  4 files changed, 84 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index fbad403ab7ac..f78b13d74e17 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -112,6 +112,7 @@
>  #define MI_SEMAPHORE_SIGNAL  MI_INSTR(0x1b, 0) /* GEN8+ */
>  #define   MI_SEMAPHORE_TARGET(engine)((engine)<<15)
>  #define MI_SEMAPHORE_WAITMI_INSTR(0x1c, 2) /* GEN8+ */
> +#define MI_SEMAPHORE_WAIT_TOKEN  MI_INSTR(0x1c, 3) /* GEN12+ */
>  #define   MI_SEMAPHORE_POLL  (1 << 15)
>  #define   MI_SEMAPHORE_SAD_GT_SDD(0 << 12)
>  #define   MI_SEMAPHORE_SAD_GTE_SDD   (1 << 12)
> @@ -119,6 +120,8 @@
>  #define   MI_SEMAPHORE_SAD_LTE_SDD   (3 << 12)
>  #define   MI_SEMAPHORE_SAD_EQ_SDD(4 << 12)
>  #define   MI_SEMAPHORE_SAD_NEQ_SDD   (5 << 12)
> +#define   MI_SEMAPHORE_TOKEN_MASKREG_GENMASK(9, 5)
> +#define   MI_SEMAPHORE_TOKEN_SHIFT   5
>  #define MI_STORE_DWORD_IMM   MI_INSTR(0x20, 1)
>  #define MI_STORE_DWORD_IMM_GEN4  MI_INSTR(0x20, 2)
>  #define   MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index a3f0e4999744..a99166a2d2eb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2879,6 +2879,22 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
> i915_request *request, u32 *cs)
>   return gen8_emit_fini_breadcrumb_footer(request, cs);
>  }
>  
> +static u32 *
> +gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> +{
> + cs = gen8_emit_ggtt_write_rcs(cs,
> +   request->fence.seqno,
> +   request->timeline->hwsp_offset,
> +   PIPE_CONTROL_CS_STALL |
> +   PIPE_CONTROL_TILE_CACHE_FLUSH |
> +   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> +   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> +   PIPE_CONTROL_DC_FLUSH_ENABLE |
> +   PIPE_CONTROL_FLUSH_ENABLE);
> +
> + return gen8_emit_fini_breadcrumb_footer(request, cs);
> +}
> +
>  /*
>   * Note that the CS instruction pre-parser will not stall on the breadcrumb
>   * flush and will continue pre-fetching the instructions after it before the
> @@ -2897,8 +2913,49 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
> i915_request *request, u32 *cs)
>   * All the above applies only to the instructions themselves. Non-inline data
>   * used by the instructions is not pre-fetched.
>   */
> -static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
> -u32 *cs)
> +
> +static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 
> *cs)
> +{
> + *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
> + MI_SEMAPHORE_GLOBAL_GTT |
> + MI_SEMAPHORE_POLL |
> + MI_SEMAPHORE_SAD_EQ_SDD;
> + *cs++ = 0;
> + *cs++ = intel_hws_preempt_address(request->engine);
> + *cs++ = 0;
> + *cs++ = 0;
> + *cs++ = MI_NOOP;
> +
> + return cs;
> +}
> +
> +static __always_inline u32*
> +gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
> +{
> + *cs++ = MI_USER_INTERRUPT;
> +
> + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> + if (intel_engine_has_semaphores(request->engine))
> + cs = gen12_emit_preempt_busywait(request, cs);
> +
> + request->tail = intel_ring_offset(request, cs);
> + assert_ring_tail_valid(request->ring, request->tail);
> +
> + return gen8_emit_wa_tail(request, cs);
> +}
> +
> +static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
> +{
> + cs = gen8_emit_ggtt_write(cs,
> +   request->fence.seqno,
> +   request->timeline->hwsp_offset,
> +   0);
> +
> + return 

[Intel-gfx] [PATCH] drm/i915/tgl: Extend MI_SEMAPHORE_WAIT

2019-09-17 Thread Chris Wilson
On Tigerlake, MI_SEMAPHORE_WAIT grew an extra dword, so be sure to
update the length field and emit that extra parameter and any padding
noop as required.

v2: Define the token shift while we are adding the updated MI_SEMAPHORE_WAIT
v3: Use int instead of bool in the addition so that readers are not left
wondering about the intricacies of the C spec. Now they just have to
worry what the integer value of a boolean operation is...

Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Daniele Ceraolo Spurio 
Cc: Michal Winiarski 
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  3 +
 drivers/gpu/drm/i915/gt/intel_lrc.c  | 71 ++--
 drivers/gpu/drm/i915/i915_pci.c  |  1 -
 drivers/gpu/drm/i915/i915_request.c  | 21 --
 4 files changed, 84 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index fbad403ab7ac..f78b13d74e17 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -112,6 +112,7 @@
 #define MI_SEMAPHORE_SIGNALMI_INSTR(0x1b, 0) /* GEN8+ */
 #define   MI_SEMAPHORE_TARGET(engine)  ((engine)<<15)
 #define MI_SEMAPHORE_WAIT  MI_INSTR(0x1c, 2) /* GEN8+ */
+#define MI_SEMAPHORE_WAIT_TOKENMI_INSTR(0x1c, 3) /* GEN12+ */
 #define   MI_SEMAPHORE_POLL(1 << 15)
 #define   MI_SEMAPHORE_SAD_GT_SDD  (0 << 12)
 #define   MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
@@ -119,6 +120,8 @@
 #define   MI_SEMAPHORE_SAD_LTE_SDD (3 << 12)
 #define   MI_SEMAPHORE_SAD_EQ_SDD  (4 << 12)
 #define   MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12)
+#define   MI_SEMAPHORE_TOKEN_MASK  REG_GENMASK(9, 5)
+#define   MI_SEMAPHORE_TOKEN_SHIFT 5
 #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1)
 #define MI_STORE_DWORD_IMM_GEN4MI_INSTR(0x20, 2)
 #define   MI_MEM_VIRTUAL   (1 << 22) /* 945,g33,965 */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a3f0e4999744..a99166a2d2eb 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2879,6 +2879,22 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
i915_request *request, u32 *cs)
return gen8_emit_fini_breadcrumb_footer(request, cs);
 }
 
+static u32 *
+gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
+{
+   cs = gen8_emit_ggtt_write_rcs(cs,
+ request->fence.seqno,
+ request->timeline->hwsp_offset,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TILE_CACHE_FLUSH |
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE);
+
+   return gen8_emit_fini_breadcrumb_footer(request, cs);
+}
+
 /*
  * Note that the CS instruction pre-parser will not stall on the breadcrumb
  * flush and will continue pre-fetching the instructions after it before the
@@ -2897,8 +2913,49 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
i915_request *request, u32 *cs)
  * All the above applies only to the instructions themselves. Non-inline data
  * used by the instructions is not pre-fetched.
  */
-static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
-  u32 *cs)
+
+static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
+{
+   *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
+   MI_SEMAPHORE_GLOBAL_GTT |
+   MI_SEMAPHORE_POLL |
+   MI_SEMAPHORE_SAD_EQ_SDD;
+   *cs++ = 0;
+   *cs++ = intel_hws_preempt_address(request->engine);
+   *cs++ = 0;
+   *cs++ = 0;
+   *cs++ = MI_NOOP;
+
+   return cs;
+}
+
+static __always_inline u32*
+gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
+{
+   *cs++ = MI_USER_INTERRUPT;
+
+   *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+   if (intel_engine_has_semaphores(request->engine))
+   cs = gen12_emit_preempt_busywait(request, cs);
+
+   request->tail = intel_ring_offset(request, cs);
+   assert_ring_tail_valid(request->ring, request->tail);
+
+   return gen8_emit_wa_tail(request, cs);
+}
+
+static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+{
+   cs = gen8_emit_ggtt_write(cs,
+ request->fence.seqno,
+ request->timeline->hwsp_offset,
+ 0);
+
+   return gen12_emit_fini_breadcrumb_footer(request, cs);
+}
+
+static u32 *
+gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
cs = gen8_emit_ggtt_write_rcs(cs,

Re: [Intel-gfx] [PATCH] drm/i915/tgl: Extend MI_SEMAPHORE_WAIT

2019-09-17 Thread Chris Wilson
Quoting Mika Kuoppala (2019-09-17 11:56:40)
> Chris Wilson  writes:
> 
> > On Tigerlake, MI_SEMAPHORE_WAIT grew an extra dword, so be sure to
> > update the length field and emit that extra parameter and any padding
> > noop as required.
> >
> > v2: Define the token shift while we are adding the updated MI_SEMAPHORE_WAIT
> >
> > Signed-off-by: Chris Wilson 
> > Cc: Mika Kuoppala 
> > Cc: Daniele Ceraolo Spurio 
> > Cc: Michal Winiarski 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  2 +
> >  drivers/gpu/drm/i915/gt/intel_lrc.c  | 71 ++--
> >  drivers/gpu/drm/i915/i915_pci.c  |  1 -
> >  drivers/gpu/drm/i915/i915_request.c  | 21 --
> >  4 files changed, 83 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
> > b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> > index fbad403ab7ac..da2025bc332c 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> > @@ -112,6 +112,7 @@
> >  #define MI_SEMAPHORE_SIGNAL  MI_INSTR(0x1b, 0) /* GEN8+ */
> >  #define   MI_SEMAPHORE_TARGET(engine)((engine)<<15)
> >  #define MI_SEMAPHORE_WAITMI_INSTR(0x1c, 2) /* GEN8+ */
> > +#define MI_SEMAPHORE_WAIT_TOKEN  MI_INSTR(0x1c, 3) /* GEN12+ */
> >  #define   MI_SEMAPHORE_POLL  (1 << 15)
> >  #define   MI_SEMAPHORE_SAD_GT_SDD(0 << 12)
> >  #define   MI_SEMAPHORE_SAD_GTE_SDD   (1 << 12)
> > @@ -119,6 +120,7 @@
> >  #define   MI_SEMAPHORE_SAD_LTE_SDD   (3 << 12)
> >  #define   MI_SEMAPHORE_SAD_EQ_SDD(4 << 12)
> >  #define   MI_SEMAPHORE_SAD_NEQ_SDD   (5 << 12)
> > +#define   MI_SEMAPHORE_TOKEN_SHIFT   5
> 
> Do we need a mask too?

Michal didn't ask for a mask, just that they were using the shift.

> >  #define MI_STORE_DWORD_IMM   MI_INSTR(0x20, 1)
> >  #define MI_STORE_DWORD_IMM_GEN4  MI_INSTR(0x20, 2)
> >  #define   MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> > b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 64fa2db5905f..c74fc75e4980 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -3237,6 +3237,22 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
> > i915_request *request, u32 *cs)
> >   return gen8_emit_fini_breadcrumb_footer(request, cs);
> >  }
> >  
> > +static u32 *
> > +gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> > +{
> > + cs = gen8_emit_ggtt_write_rcs(cs,
> > +   request->fence.seqno,
> > +   request->timeline->hwsp_offset,
> > +   PIPE_CONTROL_CS_STALL |
> > +   PIPE_CONTROL_TILE_CACHE_FLUSH |
> > +   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> > +   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> > +   PIPE_CONTROL_DC_FLUSH_ENABLE |
> > +   PIPE_CONTROL_FLUSH_ENABLE);
> > +
> > + return gen8_emit_fini_breadcrumb_footer(request, cs);
> > +}
> > +
> >  /*
> >   * Note that the CS instruction pre-parser will not stall on the breadcrumb
> >   * flush and will continue pre-fetching the instructions after it before 
> > the
> > @@ -3255,8 +3271,49 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
> > i915_request *request, u32 *cs)
> >   * All the above applies only to the instructions themselves. Non-inline 
> > data
> >   * used by the instructions is not pre-fetched.
> >   */
> > -static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
> > -u32 *cs)
> > +
> > +static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 
> > *cs)
> > +{
> > + *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
> > + MI_SEMAPHORE_GLOBAL_GTT |
> > + MI_SEMAPHORE_POLL |
> > + MI_SEMAPHORE_SAD_EQ_SDD;
> > + *cs++ = 0;
> > + *cs++ = intel_hws_preempt_address(request->engine);
> 
> This is supposed to be in canonical form.
> 
> > + *cs++ = 0;

All upper bits are 0. Canonical form is correct :)

> > + *cs++ = 0;
> > + *cs++ = MI_NOOP;
> > +
> > + return cs;
> > +}

> > +static u32 *
> > +gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> >  {
> >   cs = gen8_emit_ggtt_write_rcs(cs,
> > request->fence.seqno,
> > @@ -3268,7 +3325,7 @@ static u32 *gen11_emit_fini_breadcrumb_rcs(struct 
> > i915_request *request,
> > PIPE_CONTROL_DC_FLUSH_ENABLE |
> > PIPE_CONTROL_FLUSH_ENABLE);
> >  
> > - return gen8_emit_fini_breadcrumb_footer(request, cs);
> > + return gen12_emit_fini_breadcrumb_footer(request, cs);
> >  }
> >  
> >  static void execlists_park(struct intel_engine_cs *engine)
> > @@ -3297,9 +3354,6 

Re: [Intel-gfx] [PATCH] drm/i915/tgl: Extend MI_SEMAPHORE_WAIT

2019-09-17 Thread Mika Kuoppala
Chris Wilson  writes:

> On Tigerlake, MI_SEMAPHORE_WAIT grew an extra dword, so be sure to
> update the length field and emit that extra parameter and any padding
> noop as required.
>
> v2: Define the token shift while we are adding the updated MI_SEMAPHORE_WAIT
>
> Signed-off-by: Chris Wilson 
> Cc: Mika Kuoppala 
> Cc: Daniele Ceraolo Spurio 
> Cc: Michal Winiarski 
> ---
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  2 +
>  drivers/gpu/drm/i915/gt/intel_lrc.c  | 71 ++--
>  drivers/gpu/drm/i915/i915_pci.c  |  1 -
>  drivers/gpu/drm/i915/i915_request.c  | 21 --
>  4 files changed, 83 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index fbad403ab7ac..da2025bc332c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -112,6 +112,7 @@
>  #define MI_SEMAPHORE_SIGNAL  MI_INSTR(0x1b, 0) /* GEN8+ */
>  #define   MI_SEMAPHORE_TARGET(engine)((engine)<<15)
>  #define MI_SEMAPHORE_WAITMI_INSTR(0x1c, 2) /* GEN8+ */
> +#define MI_SEMAPHORE_WAIT_TOKEN  MI_INSTR(0x1c, 3) /* GEN12+ */
>  #define   MI_SEMAPHORE_POLL  (1 << 15)
>  #define   MI_SEMAPHORE_SAD_GT_SDD(0 << 12)
>  #define   MI_SEMAPHORE_SAD_GTE_SDD   (1 << 12)
> @@ -119,6 +120,7 @@
>  #define   MI_SEMAPHORE_SAD_LTE_SDD   (3 << 12)
>  #define   MI_SEMAPHORE_SAD_EQ_SDD(4 << 12)
>  #define   MI_SEMAPHORE_SAD_NEQ_SDD   (5 << 12)
> +#define   MI_SEMAPHORE_TOKEN_SHIFT   5

Do we need a mask too?

>  #define MI_STORE_DWORD_IMM   MI_INSTR(0x20, 1)
>  #define MI_STORE_DWORD_IMM_GEN4  MI_INSTR(0x20, 2)
>  #define   MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 64fa2db5905f..c74fc75e4980 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -3237,6 +3237,22 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
> i915_request *request, u32 *cs)
>   return gen8_emit_fini_breadcrumb_footer(request, cs);
>  }
>  
> +static u32 *
> +gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> +{
> + cs = gen8_emit_ggtt_write_rcs(cs,
> +   request->fence.seqno,
> +   request->timeline->hwsp_offset,
> +   PIPE_CONTROL_CS_STALL |
> +   PIPE_CONTROL_TILE_CACHE_FLUSH |
> +   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> +   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> +   PIPE_CONTROL_DC_FLUSH_ENABLE |
> +   PIPE_CONTROL_FLUSH_ENABLE);
> +
> + return gen8_emit_fini_breadcrumb_footer(request, cs);
> +}
> +
>  /*
>   * Note that the CS instruction pre-parser will not stall on the breadcrumb
>   * flush and will continue pre-fetching the instructions after it before the
> @@ -3255,8 +3271,49 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
> i915_request *request, u32 *cs)
>   * All the above applies only to the instructions themselves. Non-inline data
>   * used by the instructions is not pre-fetched.
>   */
> -static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
> -u32 *cs)
> +
> +static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 
> *cs)
> +{
> + *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
> + MI_SEMAPHORE_GLOBAL_GTT |
> + MI_SEMAPHORE_POLL |
> + MI_SEMAPHORE_SAD_EQ_SDD;
> + *cs++ = 0;
> + *cs++ = intel_hws_preempt_address(request->engine);

This is supposed to be in canonical form.

> + *cs++ = 0;
> + *cs++ = 0;
> + *cs++ = MI_NOOP;
> +
> + return cs;
> +}
> +
> +static __always_inline u32*
> +gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
> +{
> + *cs++ = MI_USER_INTERRUPT;
> +
> + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> + if (intel_engine_has_semaphores(request->engine))
> + cs = gen12_emit_preempt_busywait(request, cs);
> +
> + request->tail = intel_ring_offset(request, cs);
> + assert_ring_tail_valid(request->ring, request->tail);
> +
> + return gen8_emit_wa_tail(request, cs);
> +}
> +
> +static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
> +{
> + cs = gen8_emit_ggtt_write(cs,
> +   request->fence.seqno,
> +   request->timeline->hwsp_offset,
> +   0);
> +
> + return gen12_emit_fini_breadcrumb_footer(request, cs);
> +}
> +
> +static u32 *
> +gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>  {
>   cs = gen8_emit_ggtt_write_rcs(cs,
> 

[Intel-gfx] [PATCH] drm/i915/tgl: Extend MI_SEMAPHORE_WAIT

2019-09-13 Thread Chris Wilson
On Tigerlake, MI_SEMAPHORE_WAIT grew an extra dword, so be sure to
update the length field and emit that extra parameter and any padding
noop as required.

v2: Define the token shift while we are adding the updated MI_SEMAPHORE_WAIT

Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Daniele Ceraolo Spurio 
Cc: Michal Winiarski 
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  2 +
 drivers/gpu/drm/i915/gt/intel_lrc.c  | 71 ++--
 drivers/gpu/drm/i915/i915_pci.c  |  1 -
 drivers/gpu/drm/i915/i915_request.c  | 21 --
 4 files changed, 83 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index fbad403ab7ac..da2025bc332c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -112,6 +112,7 @@
 #define MI_SEMAPHORE_SIGNALMI_INSTR(0x1b, 0) /* GEN8+ */
 #define   MI_SEMAPHORE_TARGET(engine)  ((engine)<<15)
 #define MI_SEMAPHORE_WAIT  MI_INSTR(0x1c, 2) /* GEN8+ */
+#define MI_SEMAPHORE_WAIT_TOKENMI_INSTR(0x1c, 3) /* GEN12+ */
 #define   MI_SEMAPHORE_POLL(1 << 15)
 #define   MI_SEMAPHORE_SAD_GT_SDD  (0 << 12)
 #define   MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
@@ -119,6 +120,7 @@
 #define   MI_SEMAPHORE_SAD_LTE_SDD (3 << 12)
 #define   MI_SEMAPHORE_SAD_EQ_SDD  (4 << 12)
 #define   MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12)
+#define   MI_SEMAPHORE_TOKEN_SHIFT 5
 #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1)
 #define MI_STORE_DWORD_IMM_GEN4MI_INSTR(0x20, 2)
 #define   MI_MEM_VIRTUAL   (1 << 22) /* 945,g33,965 */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 64fa2db5905f..c74fc75e4980 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3237,6 +3237,22 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
i915_request *request, u32 *cs)
return gen8_emit_fini_breadcrumb_footer(request, cs);
 }
 
+static u32 *
+gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
+{
+   cs = gen8_emit_ggtt_write_rcs(cs,
+ request->fence.seqno,
+ request->timeline->hwsp_offset,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TILE_CACHE_FLUSH |
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE);
+
+   return gen8_emit_fini_breadcrumb_footer(request, cs);
+}
+
 /*
  * Note that the CS instruction pre-parser will not stall on the breadcrumb
  * flush and will continue pre-fetching the instructions after it before the
@@ -3255,8 +3271,49 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
i915_request *request, u32 *cs)
  * All the above applies only to the instructions themselves. Non-inline data
  * used by the instructions is not pre-fetched.
  */
-static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
-  u32 *cs)
+
+static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
+{
+   *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
+   MI_SEMAPHORE_GLOBAL_GTT |
+   MI_SEMAPHORE_POLL |
+   MI_SEMAPHORE_SAD_EQ_SDD;
+   *cs++ = 0;
+   *cs++ = intel_hws_preempt_address(request->engine);
+   *cs++ = 0;
+   *cs++ = 0;
+   *cs++ = MI_NOOP;
+
+   return cs;
+}
+
+static __always_inline u32*
+gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
+{
+   *cs++ = MI_USER_INTERRUPT;
+
+   *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+   if (intel_engine_has_semaphores(request->engine))
+   cs = gen12_emit_preempt_busywait(request, cs);
+
+   request->tail = intel_ring_offset(request, cs);
+   assert_ring_tail_valid(request->ring, request->tail);
+
+   return gen8_emit_wa_tail(request, cs);
+}
+
+static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+{
+   cs = gen8_emit_ggtt_write(cs,
+ request->fence.seqno,
+ request->timeline->hwsp_offset,
+ 0);
+
+   return gen12_emit_fini_breadcrumb_footer(request, cs);
+}
+
+static u32 *
+gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
cs = gen8_emit_ggtt_write_rcs(cs,
  request->fence.seqno,
@@ -3268,7 +3325,7 @@ static u32 *gen11_emit_fini_breadcrumb_rcs(struct 
i915_request *request,
  PIPE_CONTROL_DC_FLUSH_ENABLE |
  PIPE_CONTROL_FLUSH_ENABLE);
 

[Intel-gfx] [PATCH] drm/i915/tgl: Extend MI_SEMAPHORE_WAIT

2019-09-12 Thread Chris Wilson
On Tigerlake, MI_SEMAPHORE_WAIT grew an extra dword, so be sure to
update the length field and emit that extra parameter and any padding
noop as required.

Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  1 +
 drivers/gpu/drm/i915/gt/intel_lrc.c  | 69 ++--
 drivers/gpu/drm/i915/i915_pci.c  |  1 -
 drivers/gpu/drm/i915/i915_request.c  | 21 --
 4 files changed, 81 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index fbad403ab7ac..ad6ba92f4d65 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -112,6 +112,7 @@
 #define MI_SEMAPHORE_SIGNALMI_INSTR(0x1b, 0) /* GEN8+ */
 #define   MI_SEMAPHORE_TARGET(engine)  ((engine)<<15)
 #define MI_SEMAPHORE_WAIT  MI_INSTR(0x1c, 2) /* GEN8+ */
+#define MI_SEMAPHORE_WAIT_GEN12MI_INSTR(0x1c, 3) /* GEN12+ */
 #define   MI_SEMAPHORE_POLL(1 << 15)
 #define   MI_SEMAPHORE_SAD_GT_SDD  (0 << 12)
 #define   MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a3f0e4999744..840ea473f9ad 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2879,6 +2879,22 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
i915_request *request, u32 *cs)
return gen8_emit_fini_breadcrumb_footer(request, cs);
 }
 
+static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
+  u32 *cs)
+{
+   cs = gen8_emit_ggtt_write_rcs(cs,
+ request->fence.seqno,
+ request->timeline->hwsp_offset,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TILE_CACHE_FLUSH |
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE);
+
+   return gen8_emit_fini_breadcrumb_footer(request, cs);
+}
+
 /*
  * Note that the CS instruction pre-parser will not stall on the breadcrumb
  * flush and will continue pre-fetching the instructions after it before the
@@ -2897,7 +2913,48 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
i915_request *request, u32 *cs)
  * All the above applies only to the instructions themselves. Non-inline data
  * used by the instructions is not pre-fetched.
  */
-static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
+
+static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
+{
+   *cs++ = MI_SEMAPHORE_WAIT_GEN12 |
+   MI_SEMAPHORE_GLOBAL_GTT |
+   MI_SEMAPHORE_POLL |
+   MI_SEMAPHORE_SAD_EQ_SDD;
+   *cs++ = 0;
+   *cs++ = intel_hws_preempt_address(request->engine);
+   *cs++ = 0;
+   *cs++ = 0;
+   *cs++ = MI_NOOP;
+
+   return cs;
+}
+
+static __always_inline u32*
+gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
+{
+   *cs++ = MI_USER_INTERRUPT;
+
+   *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+   if (intel_engine_has_semaphores(request->engine))
+   cs = gen12_emit_preempt_busywait(request, cs);
+
+   request->tail = intel_ring_offset(request, cs);
+   assert_ring_tail_valid(request->ring, request->tail);
+
+   return gen8_emit_wa_tail(request, cs);
+}
+
+static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+{
+   cs = gen8_emit_ggtt_write(cs,
+ request->fence.seqno,
+ request->timeline->hwsp_offset,
+ 0);
+
+   return gen12_emit_fini_breadcrumb_footer(request, cs);
+}
+
+static u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *request,
   u32 *cs)
 {
cs = gen8_emit_ggtt_write_rcs(cs,
@@ -2910,7 +2967,7 @@ static u32 *gen11_emit_fini_breadcrumb_rcs(struct 
i915_request *request,
  PIPE_CONTROL_DC_FLUSH_ENABLE |
  PIPE_CONTROL_FLUSH_ENABLE);
 
-   return gen8_emit_fini_breadcrumb_footer(request, cs);
+   return gen12_emit_fini_breadcrumb_footer(request, cs);
 }
 
 static void execlists_park(struct intel_engine_cs *engine)
@@ -2939,9 +2996,6 @@ void intel_execlists_set_default_submission(struct 
intel_engine_cs *engine)
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
}
 
-   if (INTEL_GEN(engine->i915) >= 12) /* XXX disabled for debugging */
-   engine->flags &= ~I915_ENGINE_HAS_SEMAPHORES;
-