Re: [Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.

2018-05-10 Thread Francisco Jerez
Plamena Manolova  writes:

> Adds suppport for ARB_fragment_shader_interlock. We achieve
> the interlock and fragment ordering by issuing a memory fence
> via sendc.
>
> Signed-off-by: Plamena Manolova 

Reviewed-by: Francisco Jerez 

> ---
>  docs/features.txt|  2 +-
>  docs/relnotes/18.1.0.html|  1 +
>  src/intel/compiler/brw_eu.h  |  3 ++-
>  src/intel/compiler/brw_eu_defines.h  |  2 ++
>  src/intel/compiler/brw_eu_emit.c |  7 ---
>  src/intel/compiler/brw_fs_generator.cpp  |  7 ++-
>  src/intel/compiler/brw_fs_nir.cpp| 15 +++
>  src/intel/compiler/brw_shader.cpp|  4 
>  src/intel/compiler/brw_vec4_generator.cpp|  2 +-
>  src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
>  10 files changed, 37 insertions(+), 7 deletions(-)
>
> diff --git a/docs/features.txt b/docs/features.txt
> index e786bbecf4..ed4050cf98 100644
> --- a/docs/features.txt
> +++ b/docs/features.txt
> @@ -300,7 +300,7 @@ Khronos, ARB, and OES extensions that are not part of any 
> OpenGL or OpenGL ES ve
>GL_ARB_cl_event   not started
>GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi)
>GL_ARB_ES3_2_compatibilityDONE (i965/gen8+)
> -  GL_ARB_fragment_shader_interlock  not started
> +  GL_ARB_fragment_shader_interlock  DONE (i965)
>GL_ARB_gpu_shader_int64   DONE (i965/gen8+, 
> nvc0, radeonsi, softpipe, llvmpipe)
>GL_ARB_parallel_shader_compilenot started, but 
> Chia-I Wu did some related work in 2014
>GL_ARB_post_depth_coverageDONE (i965, nvc0)
> diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html
> index deeb23db03..e6a1343c8b 100644
> --- a/docs/relnotes/18.1.0.html
> +++ b/docs/relnotes/18.1.0.html
> @@ -53,6 +53,7 @@ Note: some of the new features are only available with 
> certain drivers.
>  GL_EXT_shader_framebuffer_fetch_non_coherent on i965
>  GL_KHR_blend_equation_advanced on radeonsi
>  Disk shader cache support for i965 enabled by default
> +GL_ARB_fragment_shader_interlock on i965
>  
>  
>  Bug fixes
> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
> index 120a74f035..10c17e2fc6 100644
> --- a/src/intel/compiler/brw_eu.h
> +++ b/src/intel/compiler/brw_eu.h
> @@ -510,7 +510,8 @@ brw_byte_scattered_write(struct brw_codegen *p,
>  
>  void
>  brw_memory_fence(struct brw_codegen *p,
> - struct brw_reg dst);
> + struct brw_reg dst,
> + enum opcode send_op);
>  
>  void
>  brw_pixel_interpolator_query(struct brw_codegen *p,
> diff --git a/src/intel/compiler/brw_eu_defines.h 
> b/src/intel/compiler/brw_eu_defines.h
> index 332d627bc3..2980e98a58 100644
> --- a/src/intel/compiler/brw_eu_defines.h
> +++ b/src/intel/compiler/brw_eu_defines.h
> @@ -480,6 +480,8 @@ enum opcode {
>  
> SHADER_OPCODE_GET_BUFFER_SIZE,
>  
> +   SHADER_OPCODE_INTERLOCK,
> +
> VEC4_OPCODE_MOV_BYTES,
> VEC4_OPCODE_PACK_BYTES,
> VEC4_OPCODE_UNPACK_UNIFORM,
> diff --git a/src/intel/compiler/brw_eu_emit.c 
> b/src/intel/compiler/brw_eu_emit.c
> index ee5a048bca..6fdee1a1dc 100644
> --- a/src/intel/compiler/brw_eu_emit.c
> +++ b/src/intel/compiler/brw_eu_emit.c
> @@ -3288,7 +3288,8 @@ brw_set_memory_fence_message(struct brw_codegen *p,
>  
>  void
>  brw_memory_fence(struct brw_codegen *p,
> - struct brw_reg dst)
> + struct brw_reg dst,
> + enum opcode send_op)
>  {
> const struct gen_device_info *devinfo = p->devinfo;
> const bool commit_enable =
> @@ -3304,7 +3305,7 @@ brw_memory_fence(struct brw_codegen *p,
> /* Set dst as destination for dependency tracking, the MEMORY_FENCE
>  * message doesn't write anything back.
>  */
> -   insn = next_insn(p, BRW_OPCODE_SEND);
> +   insn = next_insn(p, send_op);
> dst = retype(dst, BRW_REGISTER_TYPE_UW);
> brw_set_dest(p, insn, dst);
> brw_set_src0(p, insn, dst);
> @@ -3316,7 +3317,7 @@ brw_memory_fence(struct brw_codegen *p,
> * flush it too.  Use a different register so both flushes can be
> * pipelined by the hardware.
> */
> -  insn = next_insn(p, BRW_OPCODE_SEND);
> +  insn = next_insn(p, send_op);
>brw_set_dest(p, insn, offset(dst, 1));
>brw_set_src0(p, insn, offset(dst, 1));
>brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> b/src/intel/compiler/brw_fs_generator.cpp
> index 6d5306a0ee..f21115e34d 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -2277,7 +2277,12 @@ fs_generator::generate_code(const cfg_t *cfg, int 
> dispatch_width)
>   break;
>  
>case SHADER

[Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.

2018-05-10 Thread Plamena Manolova
Adds suppport for ARB_fragment_shader_interlock. We achieve
the interlock and fragment ordering by issuing a memory fence
via sendc.

Signed-off-by: Plamena Manolova 
---
 docs/features.txt|  2 +-
 docs/relnotes/18.1.0.html|  1 +
 src/intel/compiler/brw_eu.h  |  3 ++-
 src/intel/compiler/brw_eu_defines.h  |  2 ++
 src/intel/compiler/brw_eu_emit.c |  7 ---
 src/intel/compiler/brw_fs_generator.cpp  |  7 ++-
 src/intel/compiler/brw_fs_nir.cpp| 15 +++
 src/intel/compiler/brw_shader.cpp|  4 
 src/intel/compiler/brw_vec4_generator.cpp|  2 +-
 src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
 10 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index e786bbecf4..ed4050cf98 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -300,7 +300,7 @@ Khronos, ARB, and OES extensions that are not part of any 
OpenGL or OpenGL ES ve
   GL_ARB_cl_event   not started
   GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi)
   GL_ARB_ES3_2_compatibilityDONE (i965/gen8+)
-  GL_ARB_fragment_shader_interlock  not started
+  GL_ARB_fragment_shader_interlock  DONE (i965)
   GL_ARB_gpu_shader_int64   DONE (i965/gen8+, 
nvc0, radeonsi, softpipe, llvmpipe)
   GL_ARB_parallel_shader_compilenot started, but 
Chia-I Wu did some related work in 2014
   GL_ARB_post_depth_coverageDONE (i965, nvc0)
diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html
index deeb23db03..e6a1343c8b 100644
--- a/docs/relnotes/18.1.0.html
+++ b/docs/relnotes/18.1.0.html
@@ -53,6 +53,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_EXT_shader_framebuffer_fetch_non_coherent on i965
 GL_KHR_blend_equation_advanced on radeonsi
 Disk shader cache support for i965 enabled by default
+GL_ARB_fragment_shader_interlock on i965
 
 
 Bug fixes
diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index 120a74f035..10c17e2fc6 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -510,7 +510,8 @@ brw_byte_scattered_write(struct brw_codegen *p,
 
 void
 brw_memory_fence(struct brw_codegen *p,
- struct brw_reg dst);
+ struct brw_reg dst,
+ enum opcode send_op);
 
 void
 brw_pixel_interpolator_query(struct brw_codegen *p,
diff --git a/src/intel/compiler/brw_eu_defines.h 
b/src/intel/compiler/brw_eu_defines.h
index 332d627bc3..2980e98a58 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -480,6 +480,8 @@ enum opcode {
 
SHADER_OPCODE_GET_BUFFER_SIZE,
 
+   SHADER_OPCODE_INTERLOCK,
+
VEC4_OPCODE_MOV_BYTES,
VEC4_OPCODE_PACK_BYTES,
VEC4_OPCODE_UNPACK_UNIFORM,
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index ee5a048bca..6fdee1a1dc 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -3288,7 +3288,8 @@ brw_set_memory_fence_message(struct brw_codegen *p,
 
 void
 brw_memory_fence(struct brw_codegen *p,
- struct brw_reg dst)
+ struct brw_reg dst,
+ enum opcode send_op)
 {
const struct gen_device_info *devinfo = p->devinfo;
const bool commit_enable =
@@ -3304,7 +3305,7 @@ brw_memory_fence(struct brw_codegen *p,
/* Set dst as destination for dependency tracking, the MEMORY_FENCE
 * message doesn't write anything back.
 */
-   insn = next_insn(p, BRW_OPCODE_SEND);
+   insn = next_insn(p, send_op);
dst = retype(dst, BRW_REGISTER_TYPE_UW);
brw_set_dest(p, insn, dst);
brw_set_src0(p, insn, dst);
@@ -3316,7 +3317,7 @@ brw_memory_fence(struct brw_codegen *p,
* flush it too.  Use a different register so both flushes can be
* pipelined by the hardware.
*/
-  insn = next_insn(p, BRW_OPCODE_SEND);
+  insn = next_insn(p, send_op);
   brw_set_dest(p, insn, offset(dst, 1));
   brw_set_src0(p, insn, offset(dst, 1));
   brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
diff --git a/src/intel/compiler/brw_fs_generator.cpp 
b/src/intel/compiler/brw_fs_generator.cpp
index 6d5306a0ee..f21115e34d 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -2277,7 +2277,12 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
  break;
 
   case SHADER_OPCODE_MEMORY_FENCE:
- brw_memory_fence(p, dst);
+ brw_memory_fence(p, dst, BRW_OPCODE_SEND);
+ break;
+
+  case SHADER_OPCODE_INTERLOCK:
+ /* The interlock is basically a memory fence issued via sendc */
+ brw_memory_fence(p, dst, BRW_OPCODE_SENDC)

Re: [Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.

2018-04-09 Thread Plamena Manolova
Thank you so much for reviewing these patches Curro!
I'll make the suggested changes and resubmit.

On Thu, 5 Apr 2018, 19:25 Francisco Jerez,  wrote:

> Plamena Manolova  writes:
>
> > Adds suppport for ARB_fragment_shader_interlock. We achieve
> > the interlock and fragment ordering by issuing a memory fence
> > via sendc.
> >
> > Signed-off-by: Plamena Manolova 
> > ---
> >  docs/features.txt|  2 +-
> >  docs/relnotes/18.1.0.html|  1 +
> >  src/intel/compiler/brw_eu.h  |  3 ++-
> >  src/intel/compiler/brw_eu_defines.h  |  2 ++
> >  src/intel/compiler/brw_eu_emit.c |  7 ---
> >  src/intel/compiler/brw_fs_generator.cpp  |  7 ++-
> >  src/intel/compiler/brw_fs_nir.cpp| 15 +++
> >  src/intel/compiler/brw_shader.cpp|  4 
> >  src/intel/compiler/brw_vec4_generator.cpp|  2 +-
> >  src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
> >  10 files changed, 37 insertions(+), 7 deletions(-)
> >
> > diff --git a/docs/features.txt b/docs/features.txt
> > index 5eae34bf0d..a621251efd 100644
> > --- a/docs/features.txt
> > +++ b/docs/features.txt
> > @@ -297,7 +297,7 @@ Khronos, ARB, and OES extensions that are not part
> of any OpenGL or OpenGL ES ve
> >GL_ARB_cl_event   not started
> >GL_ARB_compute_variable_group_sizeDONE (nvc0,
> radeonsi)
> >GL_ARB_ES3_2_compatibilityDONE
> (i965/gen8+)
> > -  GL_ARB_fragment_shader_interlock  not started
> > +  GL_ARB_fragment_shader_interlock  DONE (i965)
> >GL_ARB_gpu_shader_int64   DONE
> (i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe)
> >GL_ARB_parallel_shader_compilenot started,
> but Chia-I Wu did some related work in 2014
> >GL_ARB_post_depth_coverageDONE (i965)
> > diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html
> > index 1d5201717f..9d8e63855d 100644
> > --- a/docs/relnotes/18.1.0.html
> > +++ b/docs/relnotes/18.1.0.html
> > @@ -51,6 +51,7 @@ Note: some of the new features are only available with
> certain drivers.
> >  GL_EXT_shader_framebuffer_fetch on i965 on desktop GL (GLES was
> already supported)
> >  GL_EXT_shader_framebuffer_fetch_non_coherent on i965
> >  Disk shader cache support for i965 enabled by default
> > +GL_ARB_fragment_shader_interlock on i965
> >  
> >
> >  Bug fixes
> > diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
> > index ca72666a55..b2c36d3ea1 100644
> > --- a/src/intel/compiler/brw_eu.h
> > +++ b/src/intel/compiler/brw_eu.h
> > @@ -509,7 +509,8 @@ brw_byte_scattered_write(struct brw_codegen *p,
> >
> >  void
> >  brw_memory_fence(struct brw_codegen *p,
> > - struct brw_reg dst);
> > + struct brw_reg dst,
> > + uint32_t send_op);
> >
>
> The new argument should probably be of type "enum opcode" in order to
> avoid losing type information.
>
> >  void
> >  brw_pixel_interpolator_query(struct brw_codegen *p,
> > diff --git a/src/intel/compiler/brw_eu_defines.h
> b/src/intel/compiler/brw_eu_defines.h
> > index 332d627bc3..2980e98a58 100644
> > --- a/src/intel/compiler/brw_eu_defines.h
> > +++ b/src/intel/compiler/brw_eu_defines.h
> > @@ -480,6 +480,8 @@ enum opcode {
> >
> > SHADER_OPCODE_GET_BUFFER_SIZE,
> >
> > +   SHADER_OPCODE_INTERLOCK,
> > +
> > VEC4_OPCODE_MOV_BYTES,
> > VEC4_OPCODE_PACK_BYTES,
> > VEC4_OPCODE_UNPACK_UNIFORM,
> > diff --git a/src/intel/compiler/brw_eu_emit.c
> b/src/intel/compiler/brw_eu_emit.c
> > index f039af56d0..6a57397a41 100644
> > --- a/src/intel/compiler/brw_eu_emit.c
> > +++ b/src/intel/compiler/brw_eu_emit.c
> > @@ -3285,7 +3285,8 @@ brw_set_memory_fence_message(struct brw_codegen *p,
> >
> >  void
> >  brw_memory_fence(struct brw_codegen *p,
> > - struct brw_reg dst)
> > + struct brw_reg dst,
> > + uint32_t send_op)
> >  {
> > const struct gen_device_info *devinfo = p->devinfo;
> > const bool commit_enable =
> > @@ -3301,7 +3302,7 @@ brw_memory_fence(struct brw_codegen *p,
> > /* Set dst as destination for dependency tracking, the MEMORY_FENCE
> >  * message doesn't write anything back.
> >  */
> > -   insn = next_insn(p, BRW_OPCODE_SEND);
> > +   insn = next_insn(p, send_op);
> > dst = retype(dst, BRW_REGISTER_TYPE_UW);
> > brw_set_dest(p, insn, dst);
> > brw_set_src0(p, insn, dst);
> > @@ -3313,7 +3314,7 @@ brw_memory_fence(struct brw_codegen *p,
> > * flush it too.  Use a different register so both flushes can be
> > * pipelined by the hardware.
> > */
> > -  insn = next_insn(p, BRW_OPCODE_SEND);
> > +  insn = next_insn(p, send_op);
> >brw_set_dest(p, insn, offset(dst, 1));
> >brw_set_src0(p, insn, offset(dst, 1));
>

[Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.

2018-04-06 Thread Plamena Manolova
Adds suppport for ARB_fragment_shader_interlock. We achieve
the interlock and fragment ordering by issuing a memory fence
via sendc.

Signed-off-by: Plamena Manolova 
---
 docs/features.txt|  2 +-
 docs/relnotes/18.1.0.html|  1 +
 src/intel/compiler/brw_eu.h  |  3 ++-
 src/intel/compiler/brw_eu_defines.h  |  2 ++
 src/intel/compiler/brw_eu_emit.c |  7 ---
 src/intel/compiler/brw_fs_generator.cpp  |  7 ++-
 src/intel/compiler/brw_fs_nir.cpp| 15 +++
 src/intel/compiler/brw_shader.cpp|  4 
 src/intel/compiler/brw_vec4_generator.cpp|  2 +-
 src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
 10 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index 5eae34bf0d..a621251efd 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -297,7 +297,7 @@ Khronos, ARB, and OES extensions that are not part of any 
OpenGL or OpenGL ES ve
   GL_ARB_cl_event   not started
   GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi)
   GL_ARB_ES3_2_compatibilityDONE (i965/gen8+)
-  GL_ARB_fragment_shader_interlock  not started
+  GL_ARB_fragment_shader_interlock  DONE (i965)
   GL_ARB_gpu_shader_int64   DONE (i965/gen8+, 
nvc0, radeonsi, softpipe, llvmpipe)
   GL_ARB_parallel_shader_compilenot started, but 
Chia-I Wu did some related work in 2014
   GL_ARB_post_depth_coverageDONE (i965)
diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html
index 1d5201717f..9d8e63855d 100644
--- a/docs/relnotes/18.1.0.html
+++ b/docs/relnotes/18.1.0.html
@@ -51,6 +51,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_EXT_shader_framebuffer_fetch on i965 on desktop GL (GLES was already 
supported)
 GL_EXT_shader_framebuffer_fetch_non_coherent on i965
 Disk shader cache support for i965 enabled by default
+GL_ARB_fragment_shader_interlock on i965
 
 
 Bug fixes
diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index ca72666a55..b2c36d3ea1 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -509,7 +509,8 @@ brw_byte_scattered_write(struct brw_codegen *p,
 
 void
 brw_memory_fence(struct brw_codegen *p,
- struct brw_reg dst);
+ struct brw_reg dst,
+ uint32_t send_op);
 
 void
 brw_pixel_interpolator_query(struct brw_codegen *p,
diff --git a/src/intel/compiler/brw_eu_defines.h 
b/src/intel/compiler/brw_eu_defines.h
index 332d627bc3..2980e98a58 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -480,6 +480,8 @@ enum opcode {
 
SHADER_OPCODE_GET_BUFFER_SIZE,
 
+   SHADER_OPCODE_INTERLOCK,
+
VEC4_OPCODE_MOV_BYTES,
VEC4_OPCODE_PACK_BYTES,
VEC4_OPCODE_UNPACK_UNIFORM,
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index f039af56d0..6a57397a41 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -3285,7 +3285,8 @@ brw_set_memory_fence_message(struct brw_codegen *p,
 
 void
 brw_memory_fence(struct brw_codegen *p,
- struct brw_reg dst)
+ struct brw_reg dst,
+ uint32_t send_op)
 {
const struct gen_device_info *devinfo = p->devinfo;
const bool commit_enable =
@@ -3301,7 +3302,7 @@ brw_memory_fence(struct brw_codegen *p,
/* Set dst as destination for dependency tracking, the MEMORY_FENCE
 * message doesn't write anything back.
 */
-   insn = next_insn(p, BRW_OPCODE_SEND);
+   insn = next_insn(p, send_op);
dst = retype(dst, BRW_REGISTER_TYPE_UW);
brw_set_dest(p, insn, dst);
brw_set_src0(p, insn, dst);
@@ -3313,7 +3314,7 @@ brw_memory_fence(struct brw_codegen *p,
* flush it too.  Use a different register so both flushes can be
* pipelined by the hardware.
*/
-  insn = next_insn(p, BRW_OPCODE_SEND);
+  insn = next_insn(p, send_op);
   brw_set_dest(p, insn, offset(dst, 1));
   brw_set_src0(p, insn, offset(dst, 1));
   brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
diff --git a/src/intel/compiler/brw_fs_generator.cpp 
b/src/intel/compiler/brw_fs_generator.cpp
index 0c85eb8e1e..f099d092d1 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -2277,7 +2277,12 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
  break;
 
   case SHADER_OPCODE_MEMORY_FENCE:
- brw_memory_fence(p, dst);
+ brw_memory_fence(p, dst, BRW_OPCODE_SEND);
+ break;
+
+  case SHADER_OPCODE_INTERLOCK:
+ /* The interlock is basically a memory fence issued via sendc */
+ brw_memory_fe

Re: [Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.

2018-04-05 Thread Francisco Jerez
Plamena Manolova  writes:

> Adds suppport for ARB_fragment_shader_interlock. We achieve
> the interlock and fragment ordering by issuing a memory fence
> via sendc.
>
> Signed-off-by: Plamena Manolova 
> ---
>  docs/features.txt|  2 +-
>  docs/relnotes/18.1.0.html|  1 +
>  src/intel/compiler/brw_eu.h  |  3 ++-
>  src/intel/compiler/brw_eu_defines.h  |  2 ++
>  src/intel/compiler/brw_eu_emit.c |  7 ---
>  src/intel/compiler/brw_fs_generator.cpp  |  7 ++-
>  src/intel/compiler/brw_fs_nir.cpp| 15 +++
>  src/intel/compiler/brw_shader.cpp|  4 
>  src/intel/compiler/brw_vec4_generator.cpp|  2 +-
>  src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
>  10 files changed, 37 insertions(+), 7 deletions(-)
>
> diff --git a/docs/features.txt b/docs/features.txt
> index 5eae34bf0d..a621251efd 100644
> --- a/docs/features.txt
> +++ b/docs/features.txt
> @@ -297,7 +297,7 @@ Khronos, ARB, and OES extensions that are not part of any 
> OpenGL or OpenGL ES ve
>GL_ARB_cl_event   not started
>GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi)
>GL_ARB_ES3_2_compatibilityDONE (i965/gen8+)
> -  GL_ARB_fragment_shader_interlock  not started
> +  GL_ARB_fragment_shader_interlock  DONE (i965)
>GL_ARB_gpu_shader_int64   DONE (i965/gen8+, 
> nvc0, radeonsi, softpipe, llvmpipe)
>GL_ARB_parallel_shader_compilenot started, but 
> Chia-I Wu did some related work in 2014
>GL_ARB_post_depth_coverageDONE (i965)
> diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html
> index 1d5201717f..9d8e63855d 100644
> --- a/docs/relnotes/18.1.0.html
> +++ b/docs/relnotes/18.1.0.html
> @@ -51,6 +51,7 @@ Note: some of the new features are only available with 
> certain drivers.
>  GL_EXT_shader_framebuffer_fetch on i965 on desktop GL (GLES was already 
> supported)
>  GL_EXT_shader_framebuffer_fetch_non_coherent on i965
>  Disk shader cache support for i965 enabled by default
> +GL_ARB_fragment_shader_interlock on i965
>  
>  
>  Bug fixes
> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
> index ca72666a55..b2c36d3ea1 100644
> --- a/src/intel/compiler/brw_eu.h
> +++ b/src/intel/compiler/brw_eu.h
> @@ -509,7 +509,8 @@ brw_byte_scattered_write(struct brw_codegen *p,
>  
>  void
>  brw_memory_fence(struct brw_codegen *p,
> - struct brw_reg dst);
> + struct brw_reg dst,
> + uint32_t send_op);
>

The new argument should probably be of type "enum opcode" in order to
avoid losing type information.

>  void
>  brw_pixel_interpolator_query(struct brw_codegen *p,
> diff --git a/src/intel/compiler/brw_eu_defines.h 
> b/src/intel/compiler/brw_eu_defines.h
> index 332d627bc3..2980e98a58 100644
> --- a/src/intel/compiler/brw_eu_defines.h
> +++ b/src/intel/compiler/brw_eu_defines.h
> @@ -480,6 +480,8 @@ enum opcode {
>  
> SHADER_OPCODE_GET_BUFFER_SIZE,
>  
> +   SHADER_OPCODE_INTERLOCK,
> +
> VEC4_OPCODE_MOV_BYTES,
> VEC4_OPCODE_PACK_BYTES,
> VEC4_OPCODE_UNPACK_UNIFORM,
> diff --git a/src/intel/compiler/brw_eu_emit.c 
> b/src/intel/compiler/brw_eu_emit.c
> index f039af56d0..6a57397a41 100644
> --- a/src/intel/compiler/brw_eu_emit.c
> +++ b/src/intel/compiler/brw_eu_emit.c
> @@ -3285,7 +3285,8 @@ brw_set_memory_fence_message(struct brw_codegen *p,
>  
>  void
>  brw_memory_fence(struct brw_codegen *p,
> - struct brw_reg dst)
> + struct brw_reg dst,
> + uint32_t send_op)
>  {
> const struct gen_device_info *devinfo = p->devinfo;
> const bool commit_enable =
> @@ -3301,7 +3302,7 @@ brw_memory_fence(struct brw_codegen *p,
> /* Set dst as destination for dependency tracking, the MEMORY_FENCE
>  * message doesn't write anything back.
>  */
> -   insn = next_insn(p, BRW_OPCODE_SEND);
> +   insn = next_insn(p, send_op);
> dst = retype(dst, BRW_REGISTER_TYPE_UW);
> brw_set_dest(p, insn, dst);
> brw_set_src0(p, insn, dst);
> @@ -3313,7 +3314,7 @@ brw_memory_fence(struct brw_codegen *p,
> * flush it too.  Use a different register so both flushes can be
> * pipelined by the hardware.
> */
> -  insn = next_insn(p, BRW_OPCODE_SEND);
> +  insn = next_insn(p, send_op);
>brw_set_dest(p, insn, offset(dst, 1));
>brw_set_src0(p, insn, offset(dst, 1));
>brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> b/src/intel/compiler/brw_fs_generator.cpp
> index 0c85eb8e1e..f099d092d1 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -2277,7 +2277,12 @@ fs_generator

Re: [Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.

2017-04-19 Thread Manolova, Plamena
Hi Francisco,
Thank you for reviewing!

On Wed, Apr 19, 2017 at 4:18 PM, Francisco Jerez 
wrote:

> Hi Pam, looks good overall, a couple of comments below,
>
> Plamena Manolova  writes:
>
> > Adds suppport for ARB_fragment_shader_interlock. We achieve
> > the interlock and fragment ordering by issuing a memory fence
> > via sendc.
> >
> > Signed-off-by: Plamena Manolova 
> > ---
> >  docs/features.txt|  2 +-
> >  docs/relnotes/17.1.0.html|  1 +
> >  src/intel/compiler/brw_eu.h  |  4 +++
> >  src/intel/compiler/brw_eu_defines.h  |  2 ++
> >  src/intel/compiler/brw_eu_emit.c | 47
> 
> >  src/intel/compiler/brw_fs_generator.cpp  |  4 +++
> >  src/intel/compiler/brw_fs_nir.cpp| 15 +
> >  src/intel/compiler/brw_shader.cpp|  4 +++
> >  src/mesa/drivers/dri/i965/intel_extensions.c |  5 +++
> >  9 files changed, 83 insertions(+), 1 deletion(-)
> >
> > diff --git a/docs/features.txt b/docs/features.txt
> > index 5f63632..a6237c0 100644
> > --- a/docs/features.txt
> > +++ b/docs/features.txt
> > @@ -281,7 +281,7 @@ Khronos, ARB, and OES extensions that are not part
> of any OpenGL or OpenGL ES ve
> >GL_ARB_cl_event   not started
> >GL_ARB_compute_variable_group_sizeDONE (nvc0,
> radeonsi)
> >GL_ARB_ES3_2_compatibilityDONE
> (i965/gen8+)
> > -  GL_ARB_fragment_shader_interlock  not started
> > +  GL_ARB_fragment_shader_interlock  DONE (i965)
> >GL_ARB_gl_spirv   not started
> >GL_ARB_gpu_shader_int64   DONE
> (i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe)
> >GL_ARB_indirect_parametersDONE (nvc0,
> radeonsi)
> > diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html
> > index e7cfe38..1b2393f 100644
> > --- a/docs/relnotes/17.1.0.html
> > +++ b/docs/relnotes/17.1.0.html
> > @@ -45,6 +45,7 @@ Note: some of the new features are only available with
> certain drivers.
> >
> >  
> >  OpenGL 4.2 on i965/ivb
> > +GL_ARB_fragment_shader_interlock on i965
> >  GL_ARB_gpu_shader_fp64 on i965/ivybridge
> >  GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe,
> llvmpipe
> >  GL_ARB_shader_ballot on nvc0, radeonsi
> > diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
> > index f422595..117cfae 100644
> > --- a/src/intel/compiler/brw_eu.h
> > +++ b/src/intel/compiler/brw_eu.h
> > @@ -480,6 +480,10 @@ brw_memory_fence(struct brw_codegen *p,
> >   struct brw_reg dst);
> >
> >  void
> > +brw_interlock(struct brw_codegen *p,
> > +  struct brw_reg dst);
> > +
> > +void
> >  brw_pixel_interpolator_query(struct brw_codegen *p,
> >   struct brw_reg dest,
> >   struct brw_reg mrf,
> > diff --git a/src/intel/compiler/brw_eu_defines.h
> b/src/intel/compiler/brw_eu_defines.h
> > index 13a70f6..9eb5210 100644
> > --- a/src/intel/compiler/brw_eu_defines.h
> > +++ b/src/intel/compiler/brw_eu_defines.h
> > @@ -444,6 +444,8 @@ enum opcode {
> >  */
> > SHADER_OPCODE_BROADCAST,
> >
> > +   SHADER_OPCODE_INTERLOCK,
> > +
> > VEC4_OPCODE_MOV_BYTES,
> > VEC4_OPCODE_PACK_BYTES,
> > VEC4_OPCODE_UNPACK_UNIFORM,
> > diff --git a/src/intel/compiler/brw_eu_emit.c
> b/src/intel/compiler/brw_eu_emit.c
> > index 231d6fd..52adf22 100644
> > --- a/src/intel/compiler/brw_eu_emit.c
> > +++ b/src/intel/compiler/brw_eu_emit.c
> > @@ -3403,6 +3403,53 @@ brw_memory_fence(struct brw_codegen *p,
> >  }
> >
> >  void
> > +brw_interlock(struct brw_codegen *p,
> > +  struct brw_reg dst)
> > +{
> > +   const struct gen_device_info *devinfo = p->devinfo;
> > +   const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell;
> > +   struct brw_inst *insn;
> > +
> > +   brw_push_insn_state(p);
> > +   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
> > +   brw_set_default_exec_size(p, BRW_EXECUTE_1);
> > +   dst = vec1(dst);
> > +
> > +   /* Set dst as destination for dependency tracking, the MEMORY_FENCE
> > +* message doesn't write anything back.
> > +*/
> > +   /* BRW_OPCODE_SENDC is what the interlock actually depends on */
> > +   insn = next_insn(p, BRW_OPCODE_SENDC);
> > +   dst = retype(dst, BRW_REGISTER_TYPE_UW);
> > +   brw_set_dest(p, insn, dst);
> > +   brw_set_src0(p, insn, dst);
> > +   /* Issuing a memory fence ensures the ordering of fragments */
> > +   brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
> > +commit_enable);
> > +
> > +   if (devinfo->gen == 7 && !devinfo->is_haswell) {
> > +  /* IVB does typed surface access through the render cache, so we
> need to
> > +   * flush it too.  Use a different register so both flushes can be
> > +  

Re: [Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.

2017-04-19 Thread Francisco Jerez
Hi Pam, looks good overall, a couple of comments below,

Plamena Manolova  writes:

> Adds suppport for ARB_fragment_shader_interlock. We achieve
> the interlock and fragment ordering by issuing a memory fence
> via sendc.
>
> Signed-off-by: Plamena Manolova 
> ---
>  docs/features.txt|  2 +-
>  docs/relnotes/17.1.0.html|  1 +
>  src/intel/compiler/brw_eu.h  |  4 +++
>  src/intel/compiler/brw_eu_defines.h  |  2 ++
>  src/intel/compiler/brw_eu_emit.c | 47 
> 
>  src/intel/compiler/brw_fs_generator.cpp  |  4 +++
>  src/intel/compiler/brw_fs_nir.cpp| 15 +
>  src/intel/compiler/brw_shader.cpp|  4 +++
>  src/mesa/drivers/dri/i965/intel_extensions.c |  5 +++
>  9 files changed, 83 insertions(+), 1 deletion(-)
>
> diff --git a/docs/features.txt b/docs/features.txt
> index 5f63632..a6237c0 100644
> --- a/docs/features.txt
> +++ b/docs/features.txt
> @@ -281,7 +281,7 @@ Khronos, ARB, and OES extensions that are not part of any 
> OpenGL or OpenGL ES ve
>GL_ARB_cl_event   not started
>GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi)
>GL_ARB_ES3_2_compatibilityDONE (i965/gen8+)
> -  GL_ARB_fragment_shader_interlock  not started
> +  GL_ARB_fragment_shader_interlock  DONE (i965)
>GL_ARB_gl_spirv   not started
>GL_ARB_gpu_shader_int64   DONE (i965/gen8+, 
> nvc0, radeonsi, softpipe, llvmpipe)
>GL_ARB_indirect_parametersDONE (nvc0, radeonsi)
> diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html
> index e7cfe38..1b2393f 100644
> --- a/docs/relnotes/17.1.0.html
> +++ b/docs/relnotes/17.1.0.html
> @@ -45,6 +45,7 @@ Note: some of the new features are only available with 
> certain drivers.
>  
>  
>  OpenGL 4.2 on i965/ivb
> +GL_ARB_fragment_shader_interlock on i965
>  GL_ARB_gpu_shader_fp64 on i965/ivybridge
>  GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, 
> llvmpipe
>  GL_ARB_shader_ballot on nvc0, radeonsi
> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
> index f422595..117cfae 100644
> --- a/src/intel/compiler/brw_eu.h
> +++ b/src/intel/compiler/brw_eu.h
> @@ -480,6 +480,10 @@ brw_memory_fence(struct brw_codegen *p,
>   struct brw_reg dst);
>  
>  void
> +brw_interlock(struct brw_codegen *p,
> +  struct brw_reg dst);
> +
> +void
>  brw_pixel_interpolator_query(struct brw_codegen *p,
>   struct brw_reg dest,
>   struct brw_reg mrf,
> diff --git a/src/intel/compiler/brw_eu_defines.h 
> b/src/intel/compiler/brw_eu_defines.h
> index 13a70f6..9eb5210 100644
> --- a/src/intel/compiler/brw_eu_defines.h
> +++ b/src/intel/compiler/brw_eu_defines.h
> @@ -444,6 +444,8 @@ enum opcode {
>  */
> SHADER_OPCODE_BROADCAST,
>  
> +   SHADER_OPCODE_INTERLOCK,
> +
> VEC4_OPCODE_MOV_BYTES,
> VEC4_OPCODE_PACK_BYTES,
> VEC4_OPCODE_UNPACK_UNIFORM,
> diff --git a/src/intel/compiler/brw_eu_emit.c 
> b/src/intel/compiler/brw_eu_emit.c
> index 231d6fd..52adf22 100644
> --- a/src/intel/compiler/brw_eu_emit.c
> +++ b/src/intel/compiler/brw_eu_emit.c
> @@ -3403,6 +3403,53 @@ brw_memory_fence(struct brw_codegen *p,
>  }
>  
>  void
> +brw_interlock(struct brw_codegen *p,
> +  struct brw_reg dst)
> +{
> +   const struct gen_device_info *devinfo = p->devinfo;
> +   const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell;
> +   struct brw_inst *insn;
> +
> +   brw_push_insn_state(p);
> +   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
> +   brw_set_default_exec_size(p, BRW_EXECUTE_1);
> +   dst = vec1(dst);
> +
> +   /* Set dst as destination for dependency tracking, the MEMORY_FENCE
> +* message doesn't write anything back.
> +*/
> +   /* BRW_OPCODE_SENDC is what the interlock actually depends on */
> +   insn = next_insn(p, BRW_OPCODE_SENDC);
> +   dst = retype(dst, BRW_REGISTER_TYPE_UW);
> +   brw_set_dest(p, insn, dst);
> +   brw_set_src0(p, insn, dst);
> +   /* Issuing a memory fence ensures the ordering of fragments */
> +   brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
> +commit_enable);
> +
> +   if (devinfo->gen == 7 && !devinfo->is_haswell) {
> +  /* IVB does typed surface access through the render cache, so we need 
> to
> +   * flush it too.  Use a different register so both flushes can be
> +   * pipelined by the hardware.
> +   */
> +  insn = next_insn(p, BRW_OPCODE_SENDC);
> +  brw_set_dest(p, insn, offset(dst, 1));
> +  brw_set_src0(p, insn, offset(dst, 1));
> +  brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
> +

Re: [Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.

2017-04-17 Thread Boyan Ding
2017-04-18 9:25 GMT+08:00 Plamena Manolova :
> Adds suppport for ARB_fragment_shader_interlock. We achieve
> the interlock and fragment ordering by issuing a memory fence
> via sendc.
>
> Signed-off-by: Plamena Manolova 
> ---
>  docs/features.txt|  2 +-
>  docs/relnotes/17.1.0.html|  1 +
>  src/intel/compiler/brw_eu.h  |  4 +++
>  src/intel/compiler/brw_eu_defines.h  |  2 ++
>  src/intel/compiler/brw_eu_emit.c | 47 
> 
>  src/intel/compiler/brw_fs_generator.cpp  |  4 +++
>  src/intel/compiler/brw_fs_nir.cpp| 15 +
>  src/intel/compiler/brw_shader.cpp|  4 +++
>  src/mesa/drivers/dri/i965/intel_extensions.c |  5 +++
>  9 files changed, 83 insertions(+), 1 deletion(-)
>
> diff --git a/docs/features.txt b/docs/features.txt
> index 5f63632..a6237c0 100644
> --- a/docs/features.txt
> +++ b/docs/features.txt
> @@ -281,7 +281,7 @@ Khronos, ARB, and OES extensions that are not part of any 
> OpenGL or OpenGL ES ve
>GL_ARB_cl_event   not started
>GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi)
>GL_ARB_ES3_2_compatibilityDONE (i965/gen8+)
> -  GL_ARB_fragment_shader_interlock  not started
> +  GL_ARB_fragment_shader_interlock  DONE (i965)
>GL_ARB_gl_spirv   not started
>GL_ARB_gpu_shader_int64   DONE (i965/gen8+, 
> nvc0, radeonsi, softpipe, llvmpipe)
>GL_ARB_indirect_parametersDONE (nvc0, radeonsi)
> diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html
> index e7cfe38..1b2393f 100644
> --- a/docs/relnotes/17.1.0.html
> +++ b/docs/relnotes/17.1.0.html
> @@ -45,6 +45,7 @@ Note: some of the new features are only available with 
> certain drivers.
>
>  
>  OpenGL 4.2 on i965/ivb
> +GL_ARB_fragment_shader_interlock on i965

This should go into 17.2.0 instead.

Cheers,
Boyan Ding

>  GL_ARB_gpu_shader_fp64 on i965/ivybridge
>  GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, 
> llvmpipe
>  GL_ARB_shader_ballot on nvc0, radeonsi
> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
> index f422595..117cfae 100644
> --- a/src/intel/compiler/brw_eu.h
> +++ b/src/intel/compiler/brw_eu.h
> @@ -480,6 +480,10 @@ brw_memory_fence(struct brw_codegen *p,
>   struct brw_reg dst);
>
>  void
> +brw_interlock(struct brw_codegen *p,
> +  struct brw_reg dst);
> +
> +void
>  brw_pixel_interpolator_query(struct brw_codegen *p,
>   struct brw_reg dest,
>   struct brw_reg mrf,
> diff --git a/src/intel/compiler/brw_eu_defines.h 
> b/src/intel/compiler/brw_eu_defines.h
> index 13a70f6..9eb5210 100644
> --- a/src/intel/compiler/brw_eu_defines.h
> +++ b/src/intel/compiler/brw_eu_defines.h
> @@ -444,6 +444,8 @@ enum opcode {
>  */
> SHADER_OPCODE_BROADCAST,
>
> +   SHADER_OPCODE_INTERLOCK,
> +
> VEC4_OPCODE_MOV_BYTES,
> VEC4_OPCODE_PACK_BYTES,
> VEC4_OPCODE_UNPACK_UNIFORM,
> diff --git a/src/intel/compiler/brw_eu_emit.c 
> b/src/intel/compiler/brw_eu_emit.c
> index 231d6fd..52adf22 100644
> --- a/src/intel/compiler/brw_eu_emit.c
> +++ b/src/intel/compiler/brw_eu_emit.c
> @@ -3403,6 +3403,53 @@ brw_memory_fence(struct brw_codegen *p,
>  }
>
>  void
> +brw_interlock(struct brw_codegen *p,
> +  struct brw_reg dst)
> +{
> +   const struct gen_device_info *devinfo = p->devinfo;
> +   const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell;
> +   struct brw_inst *insn;
> +
> +   brw_push_insn_state(p);
> +   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
> +   brw_set_default_exec_size(p, BRW_EXECUTE_1);
> +   dst = vec1(dst);
> +
> +   /* Set dst as destination for dependency tracking, the MEMORY_FENCE
> +* message doesn't write anything back.
> +*/
> +   /* BRW_OPCODE_SENDC is what the interlock actually depends on */
> +   insn = next_insn(p, BRW_OPCODE_SENDC);
> +   dst = retype(dst, BRW_REGISTER_TYPE_UW);
> +   brw_set_dest(p, insn, dst);
> +   brw_set_src0(p, insn, dst);
> +   /* Issuing a memory fence ensures the ordering of fragments */
> +   brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
> +commit_enable);
> +
> +   if (devinfo->gen == 7 && !devinfo->is_haswell) {
> +  /* IVB does typed surface access through the render cache, so we need 
> to
> +   * flush it too.  Use a different register so both flushes can be
> +   * pipelined by the hardware.
> +   */
> +  insn = next_insn(p, BRW_OPCODE_SENDC);
> +  brw_set_dest(p, insn, offset(dst, 1));
> +  brw_set_src0(p, insn, offset(dst, 1));
> +  brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
> + 

[Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.

2017-04-17 Thread Plamena Manolova
Adds suppport for ARB_fragment_shader_interlock. We achieve
the interlock and fragment ordering by issuing a memory fence
via sendc.

Signed-off-by: Plamena Manolova 
---
 docs/features.txt|  2 +-
 docs/relnotes/17.1.0.html|  1 +
 src/intel/compiler/brw_eu.h  |  4 +++
 src/intel/compiler/brw_eu_defines.h  |  2 ++
 src/intel/compiler/brw_eu_emit.c | 47 
 src/intel/compiler/brw_fs_generator.cpp  |  4 +++
 src/intel/compiler/brw_fs_nir.cpp| 15 +
 src/intel/compiler/brw_shader.cpp|  4 +++
 src/mesa/drivers/dri/i965/intel_extensions.c |  5 +++
 9 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/docs/features.txt b/docs/features.txt
index 5f63632..a6237c0 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -281,7 +281,7 @@ Khronos, ARB, and OES extensions that are not part of any 
OpenGL or OpenGL ES ve
   GL_ARB_cl_event   not started
   GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi)
   GL_ARB_ES3_2_compatibilityDONE (i965/gen8+)
-  GL_ARB_fragment_shader_interlock  not started
+  GL_ARB_fragment_shader_interlock  DONE (i965)
   GL_ARB_gl_spirv   not started
   GL_ARB_gpu_shader_int64   DONE (i965/gen8+, 
nvc0, radeonsi, softpipe, llvmpipe)
   GL_ARB_indirect_parametersDONE (nvc0, radeonsi)
diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html
index e7cfe38..1b2393f 100644
--- a/docs/relnotes/17.1.0.html
+++ b/docs/relnotes/17.1.0.html
@@ -45,6 +45,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 
 OpenGL 4.2 on i965/ivb
+GL_ARB_fragment_shader_interlock on i965
 GL_ARB_gpu_shader_fp64 on i965/ivybridge
 GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, 
llvmpipe
 GL_ARB_shader_ballot on nvc0, radeonsi
diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index f422595..117cfae 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -480,6 +480,10 @@ brw_memory_fence(struct brw_codegen *p,
  struct brw_reg dst);
 
 void
+brw_interlock(struct brw_codegen *p,
+  struct brw_reg dst);
+
+void
 brw_pixel_interpolator_query(struct brw_codegen *p,
  struct brw_reg dest,
  struct brw_reg mrf,
diff --git a/src/intel/compiler/brw_eu_defines.h 
b/src/intel/compiler/brw_eu_defines.h
index 13a70f6..9eb5210 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -444,6 +444,8 @@ enum opcode {
 */
SHADER_OPCODE_BROADCAST,
 
+   SHADER_OPCODE_INTERLOCK,
+
VEC4_OPCODE_MOV_BYTES,
VEC4_OPCODE_PACK_BYTES,
VEC4_OPCODE_UNPACK_UNIFORM,
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 231d6fd..52adf22 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -3403,6 +3403,53 @@ brw_memory_fence(struct brw_codegen *p,
 }
 
 void
+brw_interlock(struct brw_codegen *p,
+  struct brw_reg dst)
+{
+   const struct gen_device_info *devinfo = p->devinfo;
+   const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell;
+   struct brw_inst *insn;
+
+   brw_push_insn_state(p);
+   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+   brw_set_default_exec_size(p, BRW_EXECUTE_1);
+   dst = vec1(dst);
+
+   /* Set dst as destination for dependency tracking, the MEMORY_FENCE
+* message doesn't write anything back.
+*/
+   /* BRW_OPCODE_SENDC is what the interlock actually depends on */
+   insn = next_insn(p, BRW_OPCODE_SENDC);
+   dst = retype(dst, BRW_REGISTER_TYPE_UW);
+   brw_set_dest(p, insn, dst);
+   brw_set_src0(p, insn, dst);
+   /* Issuing a memory fence ensures the ordering of fragments */
+   brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
+commit_enable);
+
+   if (devinfo->gen == 7 && !devinfo->is_haswell) {
+  /* IVB does typed surface access through the render cache, so we need to
+   * flush it too.  Use a different register so both flushes can be
+   * pipelined by the hardware.
+   */
+  insn = next_insn(p, BRW_OPCODE_SENDC);
+  brw_set_dest(p, insn, offset(dst, 1));
+  brw_set_src0(p, insn, offset(dst, 1));
+  brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
+   commit_enable);
+
+  /* Now write the response of the second message into the response of the
+   * first to trigger a pipeline stall -- This way future render and data
+   * cache messages will be properly ordered with respect to past data and
+   * render cache messages.
+   */
+  brw_MOV(p, dst