Re: [Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.
Plamena Manolova writes: > Adds suppport for ARB_fragment_shader_interlock. We achieve > the interlock and fragment ordering by issuing a memory fence > via sendc. > > Signed-off-by: Plamena Manolova Reviewed-by: Francisco Jerez > --- > docs/features.txt| 2 +- > docs/relnotes/18.1.0.html| 1 + > src/intel/compiler/brw_eu.h | 3 ++- > src/intel/compiler/brw_eu_defines.h | 2 ++ > src/intel/compiler/brw_eu_emit.c | 7 --- > src/intel/compiler/brw_fs_generator.cpp | 7 ++- > src/intel/compiler/brw_fs_nir.cpp| 15 +++ > src/intel/compiler/brw_shader.cpp| 4 > src/intel/compiler/brw_vec4_generator.cpp| 2 +- > src/mesa/drivers/dri/i965/intel_extensions.c | 1 + > 10 files changed, 37 insertions(+), 7 deletions(-) > > diff --git a/docs/features.txt b/docs/features.txt > index e786bbecf4..ed4050cf98 100644 > --- a/docs/features.txt > +++ b/docs/features.txt > @@ -300,7 +300,7 @@ Khronos, ARB, and OES extensions that are not part of any > OpenGL or OpenGL ES ve >GL_ARB_cl_event not started >GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi) >GL_ARB_ES3_2_compatibilityDONE (i965/gen8+) > - GL_ARB_fragment_shader_interlock not started > + GL_ARB_fragment_shader_interlock DONE (i965) >GL_ARB_gpu_shader_int64 DONE (i965/gen8+, > nvc0, radeonsi, softpipe, llvmpipe) >GL_ARB_parallel_shader_compilenot started, but > Chia-I Wu did some related work in 2014 >GL_ARB_post_depth_coverageDONE (i965, nvc0) > diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html > index deeb23db03..e6a1343c8b 100644 > --- a/docs/relnotes/18.1.0.html > +++ b/docs/relnotes/18.1.0.html > @@ -53,6 +53,7 @@ Note: some of the new features are only available with > certain drivers. > GL_EXT_shader_framebuffer_fetch_non_coherent on i965 > GL_KHR_blend_equation_advanced on radeonsi > Disk shader cache support for i965 enabled by default > +GL_ARB_fragment_shader_interlock on i965 > > > Bug fixes > diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h > index 120a74f035..10c17e2fc6 100644 > --- a/src/intel/compiler/brw_eu.h > +++ b/src/intel/compiler/brw_eu.h > @@ -510,7 +510,8 @@ brw_byte_scattered_write(struct brw_codegen *p, > > void > brw_memory_fence(struct brw_codegen *p, > - struct brw_reg dst); > + struct brw_reg dst, > + enum opcode send_op); > > void > brw_pixel_interpolator_query(struct brw_codegen *p, > diff --git a/src/intel/compiler/brw_eu_defines.h > b/src/intel/compiler/brw_eu_defines.h > index 332d627bc3..2980e98a58 100644 > --- a/src/intel/compiler/brw_eu_defines.h > +++ b/src/intel/compiler/brw_eu_defines.h > @@ -480,6 +480,8 @@ enum opcode { > > SHADER_OPCODE_GET_BUFFER_SIZE, > > + SHADER_OPCODE_INTERLOCK, > + > VEC4_OPCODE_MOV_BYTES, > VEC4_OPCODE_PACK_BYTES, > VEC4_OPCODE_UNPACK_UNIFORM, > diff --git a/src/intel/compiler/brw_eu_emit.c > b/src/intel/compiler/brw_eu_emit.c > index ee5a048bca..6fdee1a1dc 100644 > --- a/src/intel/compiler/brw_eu_emit.c > +++ b/src/intel/compiler/brw_eu_emit.c > @@ -3288,7 +3288,8 @@ brw_set_memory_fence_message(struct brw_codegen *p, > > void > brw_memory_fence(struct brw_codegen *p, > - struct brw_reg dst) > + struct brw_reg dst, > + enum opcode send_op) > { > const struct gen_device_info *devinfo = p->devinfo; > const bool commit_enable = > @@ -3304,7 +3305,7 @@ brw_memory_fence(struct brw_codegen *p, > /* Set dst as destination for dependency tracking, the MEMORY_FENCE > * message doesn't write anything back. > */ > - insn = next_insn(p, BRW_OPCODE_SEND); > + insn = next_insn(p, send_op); > dst = retype(dst, BRW_REGISTER_TYPE_UW); > brw_set_dest(p, insn, dst); > brw_set_src0(p, insn, dst); > @@ -3316,7 +3317,7 @@ brw_memory_fence(struct brw_codegen *p, > * flush it too. Use a different register so both flushes can be > * pipelined by the hardware. > */ > - insn = next_insn(p, BRW_OPCODE_SEND); > + insn = next_insn(p, send_op); >brw_set_dest(p, insn, offset(dst, 1)); >brw_set_src0(p, insn, offset(dst, 1)); >brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, > diff --git a/src/intel/compiler/brw_fs_generator.cpp > b/src/intel/compiler/brw_fs_generator.cpp > index 6d5306a0ee..f21115e34d 100644 > --- a/src/intel/compiler/brw_fs_generator.cpp > +++ b/src/intel/compiler/brw_fs_generator.cpp > @@ -2277,7 +2277,12 @@ fs_generator::generate_code(const cfg_t *cfg, int > dispatch_width) > break; > >case SHADER
[Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.
Adds suppport for ARB_fragment_shader_interlock. We achieve the interlock and fragment ordering by issuing a memory fence via sendc. Signed-off-by: Plamena Manolova --- docs/features.txt| 2 +- docs/relnotes/18.1.0.html| 1 + src/intel/compiler/brw_eu.h | 3 ++- src/intel/compiler/brw_eu_defines.h | 2 ++ src/intel/compiler/brw_eu_emit.c | 7 --- src/intel/compiler/brw_fs_generator.cpp | 7 ++- src/intel/compiler/brw_fs_nir.cpp| 15 +++ src/intel/compiler/brw_shader.cpp| 4 src/intel/compiler/brw_vec4_generator.cpp| 2 +- src/mesa/drivers/dri/i965/intel_extensions.c | 1 + 10 files changed, 37 insertions(+), 7 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index e786bbecf4..ed4050cf98 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -300,7 +300,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve GL_ARB_cl_event not started GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi) GL_ARB_ES3_2_compatibilityDONE (i965/gen8+) - GL_ARB_fragment_shader_interlock not started + GL_ARB_fragment_shader_interlock DONE (i965) GL_ARB_gpu_shader_int64 DONE (i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe) GL_ARB_parallel_shader_compilenot started, but Chia-I Wu did some related work in 2014 GL_ARB_post_depth_coverageDONE (i965, nvc0) diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html index deeb23db03..e6a1343c8b 100644 --- a/docs/relnotes/18.1.0.html +++ b/docs/relnotes/18.1.0.html @@ -53,6 +53,7 @@ Note: some of the new features are only available with certain drivers. GL_EXT_shader_framebuffer_fetch_non_coherent on i965 GL_KHR_blend_equation_advanced on radeonsi Disk shader cache support for i965 enabled by default +GL_ARB_fragment_shader_interlock on i965 Bug fixes diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index 120a74f035..10c17e2fc6 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -510,7 +510,8 @@ brw_byte_scattered_write(struct brw_codegen *p, void brw_memory_fence(struct brw_codegen *p, - struct brw_reg dst); + struct brw_reg dst, + enum opcode send_op); void brw_pixel_interpolator_query(struct brw_codegen *p, diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 332d627bc3..2980e98a58 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -480,6 +480,8 @@ enum opcode { SHADER_OPCODE_GET_BUFFER_SIZE, + SHADER_OPCODE_INTERLOCK, + VEC4_OPCODE_MOV_BYTES, VEC4_OPCODE_PACK_BYTES, VEC4_OPCODE_UNPACK_UNIFORM, diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index ee5a048bca..6fdee1a1dc 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3288,7 +3288,8 @@ brw_set_memory_fence_message(struct brw_codegen *p, void brw_memory_fence(struct brw_codegen *p, - struct brw_reg dst) + struct brw_reg dst, + enum opcode send_op) { const struct gen_device_info *devinfo = p->devinfo; const bool commit_enable = @@ -3304,7 +3305,7 @@ brw_memory_fence(struct brw_codegen *p, /* Set dst as destination for dependency tracking, the MEMORY_FENCE * message doesn't write anything back. */ - insn = next_insn(p, BRW_OPCODE_SEND); + insn = next_insn(p, send_op); dst = retype(dst, BRW_REGISTER_TYPE_UW); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, dst); @@ -3316,7 +3317,7 @@ brw_memory_fence(struct brw_codegen *p, * flush it too. Use a different register so both flushes can be * pipelined by the hardware. */ - insn = next_insn(p, BRW_OPCODE_SEND); + insn = next_insn(p, send_op); brw_set_dest(p, insn, offset(dst, 1)); brw_set_src0(p, insn, offset(dst, 1)); brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 6d5306a0ee..f21115e34d 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2277,7 +2277,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case SHADER_OPCODE_MEMORY_FENCE: - brw_memory_fence(p, dst); + brw_memory_fence(p, dst, BRW_OPCODE_SEND); + break; + + case SHADER_OPCODE_INTERLOCK: + /* The interlock is basically a memory fence issued via sendc */ + brw_memory_fence(p, dst, BRW_OPCODE_SENDC)
Re: [Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.
Thank you so much for reviewing these patches Curro! I'll make the suggested changes and resubmit. On Thu, 5 Apr 2018, 19:25 Francisco Jerez, wrote: > Plamena Manolova writes: > > > Adds suppport for ARB_fragment_shader_interlock. We achieve > > the interlock and fragment ordering by issuing a memory fence > > via sendc. > > > > Signed-off-by: Plamena Manolova > > --- > > docs/features.txt| 2 +- > > docs/relnotes/18.1.0.html| 1 + > > src/intel/compiler/brw_eu.h | 3 ++- > > src/intel/compiler/brw_eu_defines.h | 2 ++ > > src/intel/compiler/brw_eu_emit.c | 7 --- > > src/intel/compiler/brw_fs_generator.cpp | 7 ++- > > src/intel/compiler/brw_fs_nir.cpp| 15 +++ > > src/intel/compiler/brw_shader.cpp| 4 > > src/intel/compiler/brw_vec4_generator.cpp| 2 +- > > src/mesa/drivers/dri/i965/intel_extensions.c | 1 + > > 10 files changed, 37 insertions(+), 7 deletions(-) > > > > diff --git a/docs/features.txt b/docs/features.txt > > index 5eae34bf0d..a621251efd 100644 > > --- a/docs/features.txt > > +++ b/docs/features.txt > > @@ -297,7 +297,7 @@ Khronos, ARB, and OES extensions that are not part > of any OpenGL or OpenGL ES ve > >GL_ARB_cl_event not started > >GL_ARB_compute_variable_group_sizeDONE (nvc0, > radeonsi) > >GL_ARB_ES3_2_compatibilityDONE > (i965/gen8+) > > - GL_ARB_fragment_shader_interlock not started > > + GL_ARB_fragment_shader_interlock DONE (i965) > >GL_ARB_gpu_shader_int64 DONE > (i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe) > >GL_ARB_parallel_shader_compilenot started, > but Chia-I Wu did some related work in 2014 > >GL_ARB_post_depth_coverageDONE (i965) > > diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html > > index 1d5201717f..9d8e63855d 100644 > > --- a/docs/relnotes/18.1.0.html > > +++ b/docs/relnotes/18.1.0.html > > @@ -51,6 +51,7 @@ Note: some of the new features are only available with > certain drivers. > > GL_EXT_shader_framebuffer_fetch on i965 on desktop GL (GLES was > already supported) > > GL_EXT_shader_framebuffer_fetch_non_coherent on i965 > > Disk shader cache support for i965 enabled by default > > +GL_ARB_fragment_shader_interlock on i965 > > > > > > Bug fixes > > diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h > > index ca72666a55..b2c36d3ea1 100644 > > --- a/src/intel/compiler/brw_eu.h > > +++ b/src/intel/compiler/brw_eu.h > > @@ -509,7 +509,8 @@ brw_byte_scattered_write(struct brw_codegen *p, > > > > void > > brw_memory_fence(struct brw_codegen *p, > > - struct brw_reg dst); > > + struct brw_reg dst, > > + uint32_t send_op); > > > > The new argument should probably be of type "enum opcode" in order to > avoid losing type information. > > > void > > brw_pixel_interpolator_query(struct brw_codegen *p, > > diff --git a/src/intel/compiler/brw_eu_defines.h > b/src/intel/compiler/brw_eu_defines.h > > index 332d627bc3..2980e98a58 100644 > > --- a/src/intel/compiler/brw_eu_defines.h > > +++ b/src/intel/compiler/brw_eu_defines.h > > @@ -480,6 +480,8 @@ enum opcode { > > > > SHADER_OPCODE_GET_BUFFER_SIZE, > > > > + SHADER_OPCODE_INTERLOCK, > > + > > VEC4_OPCODE_MOV_BYTES, > > VEC4_OPCODE_PACK_BYTES, > > VEC4_OPCODE_UNPACK_UNIFORM, > > diff --git a/src/intel/compiler/brw_eu_emit.c > b/src/intel/compiler/brw_eu_emit.c > > index f039af56d0..6a57397a41 100644 > > --- a/src/intel/compiler/brw_eu_emit.c > > +++ b/src/intel/compiler/brw_eu_emit.c > > @@ -3285,7 +3285,8 @@ brw_set_memory_fence_message(struct brw_codegen *p, > > > > void > > brw_memory_fence(struct brw_codegen *p, > > - struct brw_reg dst) > > + struct brw_reg dst, > > + uint32_t send_op) > > { > > const struct gen_device_info *devinfo = p->devinfo; > > const bool commit_enable = > > @@ -3301,7 +3302,7 @@ brw_memory_fence(struct brw_codegen *p, > > /* Set dst as destination for dependency tracking, the MEMORY_FENCE > > * message doesn't write anything back. > > */ > > - insn = next_insn(p, BRW_OPCODE_SEND); > > + insn = next_insn(p, send_op); > > dst = retype(dst, BRW_REGISTER_TYPE_UW); > > brw_set_dest(p, insn, dst); > > brw_set_src0(p, insn, dst); > > @@ -3313,7 +3314,7 @@ brw_memory_fence(struct brw_codegen *p, > > * flush it too. Use a different register so both flushes can be > > * pipelined by the hardware. > > */ > > - insn = next_insn(p, BRW_OPCODE_SEND); > > + insn = next_insn(p, send_op); > >brw_set_dest(p, insn, offset(dst, 1)); > >brw_set_src0(p, insn, offset(dst, 1)); >
[Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.
Adds suppport for ARB_fragment_shader_interlock. We achieve the interlock and fragment ordering by issuing a memory fence via sendc. Signed-off-by: Plamena Manolova --- docs/features.txt| 2 +- docs/relnotes/18.1.0.html| 1 + src/intel/compiler/brw_eu.h | 3 ++- src/intel/compiler/brw_eu_defines.h | 2 ++ src/intel/compiler/brw_eu_emit.c | 7 --- src/intel/compiler/brw_fs_generator.cpp | 7 ++- src/intel/compiler/brw_fs_nir.cpp| 15 +++ src/intel/compiler/brw_shader.cpp| 4 src/intel/compiler/brw_vec4_generator.cpp| 2 +- src/mesa/drivers/dri/i965/intel_extensions.c | 1 + 10 files changed, 37 insertions(+), 7 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index 5eae34bf0d..a621251efd 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -297,7 +297,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve GL_ARB_cl_event not started GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi) GL_ARB_ES3_2_compatibilityDONE (i965/gen8+) - GL_ARB_fragment_shader_interlock not started + GL_ARB_fragment_shader_interlock DONE (i965) GL_ARB_gpu_shader_int64 DONE (i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe) GL_ARB_parallel_shader_compilenot started, but Chia-I Wu did some related work in 2014 GL_ARB_post_depth_coverageDONE (i965) diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html index 1d5201717f..9d8e63855d 100644 --- a/docs/relnotes/18.1.0.html +++ b/docs/relnotes/18.1.0.html @@ -51,6 +51,7 @@ Note: some of the new features are only available with certain drivers. GL_EXT_shader_framebuffer_fetch on i965 on desktop GL (GLES was already supported) GL_EXT_shader_framebuffer_fetch_non_coherent on i965 Disk shader cache support for i965 enabled by default +GL_ARB_fragment_shader_interlock on i965 Bug fixes diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index ca72666a55..b2c36d3ea1 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -509,7 +509,8 @@ brw_byte_scattered_write(struct brw_codegen *p, void brw_memory_fence(struct brw_codegen *p, - struct brw_reg dst); + struct brw_reg dst, + uint32_t send_op); void brw_pixel_interpolator_query(struct brw_codegen *p, diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 332d627bc3..2980e98a58 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -480,6 +480,8 @@ enum opcode { SHADER_OPCODE_GET_BUFFER_SIZE, + SHADER_OPCODE_INTERLOCK, + VEC4_OPCODE_MOV_BYTES, VEC4_OPCODE_PACK_BYTES, VEC4_OPCODE_UNPACK_UNIFORM, diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index f039af56d0..6a57397a41 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3285,7 +3285,8 @@ brw_set_memory_fence_message(struct brw_codegen *p, void brw_memory_fence(struct brw_codegen *p, - struct brw_reg dst) + struct brw_reg dst, + uint32_t send_op) { const struct gen_device_info *devinfo = p->devinfo; const bool commit_enable = @@ -3301,7 +3302,7 @@ brw_memory_fence(struct brw_codegen *p, /* Set dst as destination for dependency tracking, the MEMORY_FENCE * message doesn't write anything back. */ - insn = next_insn(p, BRW_OPCODE_SEND); + insn = next_insn(p, send_op); dst = retype(dst, BRW_REGISTER_TYPE_UW); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, dst); @@ -3313,7 +3314,7 @@ brw_memory_fence(struct brw_codegen *p, * flush it too. Use a different register so both flushes can be * pipelined by the hardware. */ - insn = next_insn(p, BRW_OPCODE_SEND); + insn = next_insn(p, send_op); brw_set_dest(p, insn, offset(dst, 1)); brw_set_src0(p, insn, offset(dst, 1)); brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 0c85eb8e1e..f099d092d1 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2277,7 +2277,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case SHADER_OPCODE_MEMORY_FENCE: - brw_memory_fence(p, dst); + brw_memory_fence(p, dst, BRW_OPCODE_SEND); + break; + + case SHADER_OPCODE_INTERLOCK: + /* The interlock is basically a memory fence issued via sendc */ + brw_memory_fe
Re: [Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.
Plamena Manolova writes: > Adds suppport for ARB_fragment_shader_interlock. We achieve > the interlock and fragment ordering by issuing a memory fence > via sendc. > > Signed-off-by: Plamena Manolova > --- > docs/features.txt| 2 +- > docs/relnotes/18.1.0.html| 1 + > src/intel/compiler/brw_eu.h | 3 ++- > src/intel/compiler/brw_eu_defines.h | 2 ++ > src/intel/compiler/brw_eu_emit.c | 7 --- > src/intel/compiler/brw_fs_generator.cpp | 7 ++- > src/intel/compiler/brw_fs_nir.cpp| 15 +++ > src/intel/compiler/brw_shader.cpp| 4 > src/intel/compiler/brw_vec4_generator.cpp| 2 +- > src/mesa/drivers/dri/i965/intel_extensions.c | 1 + > 10 files changed, 37 insertions(+), 7 deletions(-) > > diff --git a/docs/features.txt b/docs/features.txt > index 5eae34bf0d..a621251efd 100644 > --- a/docs/features.txt > +++ b/docs/features.txt > @@ -297,7 +297,7 @@ Khronos, ARB, and OES extensions that are not part of any > OpenGL or OpenGL ES ve >GL_ARB_cl_event not started >GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi) >GL_ARB_ES3_2_compatibilityDONE (i965/gen8+) > - GL_ARB_fragment_shader_interlock not started > + GL_ARB_fragment_shader_interlock DONE (i965) >GL_ARB_gpu_shader_int64 DONE (i965/gen8+, > nvc0, radeonsi, softpipe, llvmpipe) >GL_ARB_parallel_shader_compilenot started, but > Chia-I Wu did some related work in 2014 >GL_ARB_post_depth_coverageDONE (i965) > diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html > index 1d5201717f..9d8e63855d 100644 > --- a/docs/relnotes/18.1.0.html > +++ b/docs/relnotes/18.1.0.html > @@ -51,6 +51,7 @@ Note: some of the new features are only available with > certain drivers. > GL_EXT_shader_framebuffer_fetch on i965 on desktop GL (GLES was already > supported) > GL_EXT_shader_framebuffer_fetch_non_coherent on i965 > Disk shader cache support for i965 enabled by default > +GL_ARB_fragment_shader_interlock on i965 > > > Bug fixes > diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h > index ca72666a55..b2c36d3ea1 100644 > --- a/src/intel/compiler/brw_eu.h > +++ b/src/intel/compiler/brw_eu.h > @@ -509,7 +509,8 @@ brw_byte_scattered_write(struct brw_codegen *p, > > void > brw_memory_fence(struct brw_codegen *p, > - struct brw_reg dst); > + struct brw_reg dst, > + uint32_t send_op); > The new argument should probably be of type "enum opcode" in order to avoid losing type information. > void > brw_pixel_interpolator_query(struct brw_codegen *p, > diff --git a/src/intel/compiler/brw_eu_defines.h > b/src/intel/compiler/brw_eu_defines.h > index 332d627bc3..2980e98a58 100644 > --- a/src/intel/compiler/brw_eu_defines.h > +++ b/src/intel/compiler/brw_eu_defines.h > @@ -480,6 +480,8 @@ enum opcode { > > SHADER_OPCODE_GET_BUFFER_SIZE, > > + SHADER_OPCODE_INTERLOCK, > + > VEC4_OPCODE_MOV_BYTES, > VEC4_OPCODE_PACK_BYTES, > VEC4_OPCODE_UNPACK_UNIFORM, > diff --git a/src/intel/compiler/brw_eu_emit.c > b/src/intel/compiler/brw_eu_emit.c > index f039af56d0..6a57397a41 100644 > --- a/src/intel/compiler/brw_eu_emit.c > +++ b/src/intel/compiler/brw_eu_emit.c > @@ -3285,7 +3285,8 @@ brw_set_memory_fence_message(struct brw_codegen *p, > > void > brw_memory_fence(struct brw_codegen *p, > - struct brw_reg dst) > + struct brw_reg dst, > + uint32_t send_op) > { > const struct gen_device_info *devinfo = p->devinfo; > const bool commit_enable = > @@ -3301,7 +3302,7 @@ brw_memory_fence(struct brw_codegen *p, > /* Set dst as destination for dependency tracking, the MEMORY_FENCE > * message doesn't write anything back. > */ > - insn = next_insn(p, BRW_OPCODE_SEND); > + insn = next_insn(p, send_op); > dst = retype(dst, BRW_REGISTER_TYPE_UW); > brw_set_dest(p, insn, dst); > brw_set_src0(p, insn, dst); > @@ -3313,7 +3314,7 @@ brw_memory_fence(struct brw_codegen *p, > * flush it too. Use a different register so both flushes can be > * pipelined by the hardware. > */ > - insn = next_insn(p, BRW_OPCODE_SEND); > + insn = next_insn(p, send_op); >brw_set_dest(p, insn, offset(dst, 1)); >brw_set_src0(p, insn, offset(dst, 1)); >brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, > diff --git a/src/intel/compiler/brw_fs_generator.cpp > b/src/intel/compiler/brw_fs_generator.cpp > index 0c85eb8e1e..f099d092d1 100644 > --- a/src/intel/compiler/brw_fs_generator.cpp > +++ b/src/intel/compiler/brw_fs_generator.cpp > @@ -2277,7 +2277,12 @@ fs_generator
Re: [Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.
Hi Francisco, Thank you for reviewing! On Wed, Apr 19, 2017 at 4:18 PM, Francisco Jerez wrote: > Hi Pam, looks good overall, a couple of comments below, > > Plamena Manolova writes: > > > Adds suppport for ARB_fragment_shader_interlock. We achieve > > the interlock and fragment ordering by issuing a memory fence > > via sendc. > > > > Signed-off-by: Plamena Manolova > > --- > > docs/features.txt| 2 +- > > docs/relnotes/17.1.0.html| 1 + > > src/intel/compiler/brw_eu.h | 4 +++ > > src/intel/compiler/brw_eu_defines.h | 2 ++ > > src/intel/compiler/brw_eu_emit.c | 47 > > > src/intel/compiler/brw_fs_generator.cpp | 4 +++ > > src/intel/compiler/brw_fs_nir.cpp| 15 + > > src/intel/compiler/brw_shader.cpp| 4 +++ > > src/mesa/drivers/dri/i965/intel_extensions.c | 5 +++ > > 9 files changed, 83 insertions(+), 1 deletion(-) > > > > diff --git a/docs/features.txt b/docs/features.txt > > index 5f63632..a6237c0 100644 > > --- a/docs/features.txt > > +++ b/docs/features.txt > > @@ -281,7 +281,7 @@ Khronos, ARB, and OES extensions that are not part > of any OpenGL or OpenGL ES ve > >GL_ARB_cl_event not started > >GL_ARB_compute_variable_group_sizeDONE (nvc0, > radeonsi) > >GL_ARB_ES3_2_compatibilityDONE > (i965/gen8+) > > - GL_ARB_fragment_shader_interlock not started > > + GL_ARB_fragment_shader_interlock DONE (i965) > >GL_ARB_gl_spirv not started > >GL_ARB_gpu_shader_int64 DONE > (i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe) > >GL_ARB_indirect_parametersDONE (nvc0, > radeonsi) > > diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html > > index e7cfe38..1b2393f 100644 > > --- a/docs/relnotes/17.1.0.html > > +++ b/docs/relnotes/17.1.0.html > > @@ -45,6 +45,7 @@ Note: some of the new features are only available with > certain drivers. > > > > > > OpenGL 4.2 on i965/ivb > > +GL_ARB_fragment_shader_interlock on i965 > > GL_ARB_gpu_shader_fp64 on i965/ivybridge > > GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, > llvmpipe > > GL_ARB_shader_ballot on nvc0, radeonsi > > diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h > > index f422595..117cfae 100644 > > --- a/src/intel/compiler/brw_eu.h > > +++ b/src/intel/compiler/brw_eu.h > > @@ -480,6 +480,10 @@ brw_memory_fence(struct brw_codegen *p, > > struct brw_reg dst); > > > > void > > +brw_interlock(struct brw_codegen *p, > > + struct brw_reg dst); > > + > > +void > > brw_pixel_interpolator_query(struct brw_codegen *p, > > struct brw_reg dest, > > struct brw_reg mrf, > > diff --git a/src/intel/compiler/brw_eu_defines.h > b/src/intel/compiler/brw_eu_defines.h > > index 13a70f6..9eb5210 100644 > > --- a/src/intel/compiler/brw_eu_defines.h > > +++ b/src/intel/compiler/brw_eu_defines.h > > @@ -444,6 +444,8 @@ enum opcode { > > */ > > SHADER_OPCODE_BROADCAST, > > > > + SHADER_OPCODE_INTERLOCK, > > + > > VEC4_OPCODE_MOV_BYTES, > > VEC4_OPCODE_PACK_BYTES, > > VEC4_OPCODE_UNPACK_UNIFORM, > > diff --git a/src/intel/compiler/brw_eu_emit.c > b/src/intel/compiler/brw_eu_emit.c > > index 231d6fd..52adf22 100644 > > --- a/src/intel/compiler/brw_eu_emit.c > > +++ b/src/intel/compiler/brw_eu_emit.c > > @@ -3403,6 +3403,53 @@ brw_memory_fence(struct brw_codegen *p, > > } > > > > void > > +brw_interlock(struct brw_codegen *p, > > + struct brw_reg dst) > > +{ > > + const struct gen_device_info *devinfo = p->devinfo; > > + const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell; > > + struct brw_inst *insn; > > + > > + brw_push_insn_state(p); > > + brw_set_default_mask_control(p, BRW_MASK_DISABLE); > > + brw_set_default_exec_size(p, BRW_EXECUTE_1); > > + dst = vec1(dst); > > + > > + /* Set dst as destination for dependency tracking, the MEMORY_FENCE > > +* message doesn't write anything back. > > +*/ > > + /* BRW_OPCODE_SENDC is what the interlock actually depends on */ > > + insn = next_insn(p, BRW_OPCODE_SENDC); > > + dst = retype(dst, BRW_REGISTER_TYPE_UW); > > + brw_set_dest(p, insn, dst); > > + brw_set_src0(p, insn, dst); > > + /* Issuing a memory fence ensures the ordering of fragments */ > > + brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, > > +commit_enable); > > + > > + if (devinfo->gen == 7 && !devinfo->is_haswell) { > > + /* IVB does typed surface access through the render cache, so we > need to > > + * flush it too. Use a different register so both flushes can be > > +
Re: [Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.
Hi Pam, looks good overall, a couple of comments below, Plamena Manolova writes: > Adds suppport for ARB_fragment_shader_interlock. We achieve > the interlock and fragment ordering by issuing a memory fence > via sendc. > > Signed-off-by: Plamena Manolova > --- > docs/features.txt| 2 +- > docs/relnotes/17.1.0.html| 1 + > src/intel/compiler/brw_eu.h | 4 +++ > src/intel/compiler/brw_eu_defines.h | 2 ++ > src/intel/compiler/brw_eu_emit.c | 47 > > src/intel/compiler/brw_fs_generator.cpp | 4 +++ > src/intel/compiler/brw_fs_nir.cpp| 15 + > src/intel/compiler/brw_shader.cpp| 4 +++ > src/mesa/drivers/dri/i965/intel_extensions.c | 5 +++ > 9 files changed, 83 insertions(+), 1 deletion(-) > > diff --git a/docs/features.txt b/docs/features.txt > index 5f63632..a6237c0 100644 > --- a/docs/features.txt > +++ b/docs/features.txt > @@ -281,7 +281,7 @@ Khronos, ARB, and OES extensions that are not part of any > OpenGL or OpenGL ES ve >GL_ARB_cl_event not started >GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi) >GL_ARB_ES3_2_compatibilityDONE (i965/gen8+) > - GL_ARB_fragment_shader_interlock not started > + GL_ARB_fragment_shader_interlock DONE (i965) >GL_ARB_gl_spirv not started >GL_ARB_gpu_shader_int64 DONE (i965/gen8+, > nvc0, radeonsi, softpipe, llvmpipe) >GL_ARB_indirect_parametersDONE (nvc0, radeonsi) > diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html > index e7cfe38..1b2393f 100644 > --- a/docs/relnotes/17.1.0.html > +++ b/docs/relnotes/17.1.0.html > @@ -45,6 +45,7 @@ Note: some of the new features are only available with > certain drivers. > > > OpenGL 4.2 on i965/ivb > +GL_ARB_fragment_shader_interlock on i965 > GL_ARB_gpu_shader_fp64 on i965/ivybridge > GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, > llvmpipe > GL_ARB_shader_ballot on nvc0, radeonsi > diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h > index f422595..117cfae 100644 > --- a/src/intel/compiler/brw_eu.h > +++ b/src/intel/compiler/brw_eu.h > @@ -480,6 +480,10 @@ brw_memory_fence(struct brw_codegen *p, > struct brw_reg dst); > > void > +brw_interlock(struct brw_codegen *p, > + struct brw_reg dst); > + > +void > brw_pixel_interpolator_query(struct brw_codegen *p, > struct brw_reg dest, > struct brw_reg mrf, > diff --git a/src/intel/compiler/brw_eu_defines.h > b/src/intel/compiler/brw_eu_defines.h > index 13a70f6..9eb5210 100644 > --- a/src/intel/compiler/brw_eu_defines.h > +++ b/src/intel/compiler/brw_eu_defines.h > @@ -444,6 +444,8 @@ enum opcode { > */ > SHADER_OPCODE_BROADCAST, > > + SHADER_OPCODE_INTERLOCK, > + > VEC4_OPCODE_MOV_BYTES, > VEC4_OPCODE_PACK_BYTES, > VEC4_OPCODE_UNPACK_UNIFORM, > diff --git a/src/intel/compiler/brw_eu_emit.c > b/src/intel/compiler/brw_eu_emit.c > index 231d6fd..52adf22 100644 > --- a/src/intel/compiler/brw_eu_emit.c > +++ b/src/intel/compiler/brw_eu_emit.c > @@ -3403,6 +3403,53 @@ brw_memory_fence(struct brw_codegen *p, > } > > void > +brw_interlock(struct brw_codegen *p, > + struct brw_reg dst) > +{ > + const struct gen_device_info *devinfo = p->devinfo; > + const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell; > + struct brw_inst *insn; > + > + brw_push_insn_state(p); > + brw_set_default_mask_control(p, BRW_MASK_DISABLE); > + brw_set_default_exec_size(p, BRW_EXECUTE_1); > + dst = vec1(dst); > + > + /* Set dst as destination for dependency tracking, the MEMORY_FENCE > +* message doesn't write anything back. > +*/ > + /* BRW_OPCODE_SENDC is what the interlock actually depends on */ > + insn = next_insn(p, BRW_OPCODE_SENDC); > + dst = retype(dst, BRW_REGISTER_TYPE_UW); > + brw_set_dest(p, insn, dst); > + brw_set_src0(p, insn, dst); > + /* Issuing a memory fence ensures the ordering of fragments */ > + brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, > +commit_enable); > + > + if (devinfo->gen == 7 && !devinfo->is_haswell) { > + /* IVB does typed surface access through the render cache, so we need > to > + * flush it too. Use a different register so both flushes can be > + * pipelined by the hardware. > + */ > + insn = next_insn(p, BRW_OPCODE_SENDC); > + brw_set_dest(p, insn, offset(dst, 1)); > + brw_set_src0(p, insn, offset(dst, 1)); > + brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, > +
Re: [Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.
2017-04-18 9:25 GMT+08:00 Plamena Manolova : > Adds suppport for ARB_fragment_shader_interlock. We achieve > the interlock and fragment ordering by issuing a memory fence > via sendc. > > Signed-off-by: Plamena Manolova > --- > docs/features.txt| 2 +- > docs/relnotes/17.1.0.html| 1 + > src/intel/compiler/brw_eu.h | 4 +++ > src/intel/compiler/brw_eu_defines.h | 2 ++ > src/intel/compiler/brw_eu_emit.c | 47 > > src/intel/compiler/brw_fs_generator.cpp | 4 +++ > src/intel/compiler/brw_fs_nir.cpp| 15 + > src/intel/compiler/brw_shader.cpp| 4 +++ > src/mesa/drivers/dri/i965/intel_extensions.c | 5 +++ > 9 files changed, 83 insertions(+), 1 deletion(-) > > diff --git a/docs/features.txt b/docs/features.txt > index 5f63632..a6237c0 100644 > --- a/docs/features.txt > +++ b/docs/features.txt > @@ -281,7 +281,7 @@ Khronos, ARB, and OES extensions that are not part of any > OpenGL or OpenGL ES ve >GL_ARB_cl_event not started >GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi) >GL_ARB_ES3_2_compatibilityDONE (i965/gen8+) > - GL_ARB_fragment_shader_interlock not started > + GL_ARB_fragment_shader_interlock DONE (i965) >GL_ARB_gl_spirv not started >GL_ARB_gpu_shader_int64 DONE (i965/gen8+, > nvc0, radeonsi, softpipe, llvmpipe) >GL_ARB_indirect_parametersDONE (nvc0, radeonsi) > diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html > index e7cfe38..1b2393f 100644 > --- a/docs/relnotes/17.1.0.html > +++ b/docs/relnotes/17.1.0.html > @@ -45,6 +45,7 @@ Note: some of the new features are only available with > certain drivers. > > > OpenGL 4.2 on i965/ivb > +GL_ARB_fragment_shader_interlock on i965 This should go into 17.2.0 instead. Cheers, Boyan Ding > GL_ARB_gpu_shader_fp64 on i965/ivybridge > GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, > llvmpipe > GL_ARB_shader_ballot on nvc0, radeonsi > diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h > index f422595..117cfae 100644 > --- a/src/intel/compiler/brw_eu.h > +++ b/src/intel/compiler/brw_eu.h > @@ -480,6 +480,10 @@ brw_memory_fence(struct brw_codegen *p, > struct brw_reg dst); > > void > +brw_interlock(struct brw_codegen *p, > + struct brw_reg dst); > + > +void > brw_pixel_interpolator_query(struct brw_codegen *p, > struct brw_reg dest, > struct brw_reg mrf, > diff --git a/src/intel/compiler/brw_eu_defines.h > b/src/intel/compiler/brw_eu_defines.h > index 13a70f6..9eb5210 100644 > --- a/src/intel/compiler/brw_eu_defines.h > +++ b/src/intel/compiler/brw_eu_defines.h > @@ -444,6 +444,8 @@ enum opcode { > */ > SHADER_OPCODE_BROADCAST, > > + SHADER_OPCODE_INTERLOCK, > + > VEC4_OPCODE_MOV_BYTES, > VEC4_OPCODE_PACK_BYTES, > VEC4_OPCODE_UNPACK_UNIFORM, > diff --git a/src/intel/compiler/brw_eu_emit.c > b/src/intel/compiler/brw_eu_emit.c > index 231d6fd..52adf22 100644 > --- a/src/intel/compiler/brw_eu_emit.c > +++ b/src/intel/compiler/brw_eu_emit.c > @@ -3403,6 +3403,53 @@ brw_memory_fence(struct brw_codegen *p, > } > > void > +brw_interlock(struct brw_codegen *p, > + struct brw_reg dst) > +{ > + const struct gen_device_info *devinfo = p->devinfo; > + const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell; > + struct brw_inst *insn; > + > + brw_push_insn_state(p); > + brw_set_default_mask_control(p, BRW_MASK_DISABLE); > + brw_set_default_exec_size(p, BRW_EXECUTE_1); > + dst = vec1(dst); > + > + /* Set dst as destination for dependency tracking, the MEMORY_FENCE > +* message doesn't write anything back. > +*/ > + /* BRW_OPCODE_SENDC is what the interlock actually depends on */ > + insn = next_insn(p, BRW_OPCODE_SENDC); > + dst = retype(dst, BRW_REGISTER_TYPE_UW); > + brw_set_dest(p, insn, dst); > + brw_set_src0(p, insn, dst); > + /* Issuing a memory fence ensures the ordering of fragments */ > + brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, > +commit_enable); > + > + if (devinfo->gen == 7 && !devinfo->is_haswell) { > + /* IVB does typed surface access through the render cache, so we need > to > + * flush it too. Use a different register so both flushes can be > + * pipelined by the hardware. > + */ > + insn = next_insn(p, BRW_OPCODE_SENDC); > + brw_set_dest(p, insn, offset(dst, 1)); > + brw_set_src0(p, insn, offset(dst, 1)); > + brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, > +
[Mesa-dev] [PATCH 2/2] i965: Add ARB_fragment_shader_interlock support.
Adds suppport for ARB_fragment_shader_interlock. We achieve the interlock and fragment ordering by issuing a memory fence via sendc. Signed-off-by: Plamena Manolova --- docs/features.txt| 2 +- docs/relnotes/17.1.0.html| 1 + src/intel/compiler/brw_eu.h | 4 +++ src/intel/compiler/brw_eu_defines.h | 2 ++ src/intel/compiler/brw_eu_emit.c | 47 src/intel/compiler/brw_fs_generator.cpp | 4 +++ src/intel/compiler/brw_fs_nir.cpp| 15 + src/intel/compiler/brw_shader.cpp| 4 +++ src/mesa/drivers/dri/i965/intel_extensions.c | 5 +++ 9 files changed, 83 insertions(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index 5f63632..a6237c0 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -281,7 +281,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve GL_ARB_cl_event not started GL_ARB_compute_variable_group_sizeDONE (nvc0, radeonsi) GL_ARB_ES3_2_compatibilityDONE (i965/gen8+) - GL_ARB_fragment_shader_interlock not started + GL_ARB_fragment_shader_interlock DONE (i965) GL_ARB_gl_spirv not started GL_ARB_gpu_shader_int64 DONE (i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe) GL_ARB_indirect_parametersDONE (nvc0, radeonsi) diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html index e7cfe38..1b2393f 100644 --- a/docs/relnotes/17.1.0.html +++ b/docs/relnotes/17.1.0.html @@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers. OpenGL 4.2 on i965/ivb +GL_ARB_fragment_shader_interlock on i965 GL_ARB_gpu_shader_fp64 on i965/ivybridge GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe GL_ARB_shader_ballot on nvc0, radeonsi diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index f422595..117cfae 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -480,6 +480,10 @@ brw_memory_fence(struct brw_codegen *p, struct brw_reg dst); void +brw_interlock(struct brw_codegen *p, + struct brw_reg dst); + +void brw_pixel_interpolator_query(struct brw_codegen *p, struct brw_reg dest, struct brw_reg mrf, diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 13a70f6..9eb5210 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -444,6 +444,8 @@ enum opcode { */ SHADER_OPCODE_BROADCAST, + SHADER_OPCODE_INTERLOCK, + VEC4_OPCODE_MOV_BYTES, VEC4_OPCODE_PACK_BYTES, VEC4_OPCODE_UNPACK_UNIFORM, diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 231d6fd..52adf22 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3403,6 +3403,53 @@ brw_memory_fence(struct brw_codegen *p, } void +brw_interlock(struct brw_codegen *p, + struct brw_reg dst) +{ + const struct gen_device_info *devinfo = p->devinfo; + const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell; + struct brw_inst *insn; + + brw_push_insn_state(p); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); + dst = vec1(dst); + + /* Set dst as destination for dependency tracking, the MEMORY_FENCE +* message doesn't write anything back. +*/ + /* BRW_OPCODE_SENDC is what the interlock actually depends on */ + insn = next_insn(p, BRW_OPCODE_SENDC); + dst = retype(dst, BRW_REGISTER_TYPE_UW); + brw_set_dest(p, insn, dst); + brw_set_src0(p, insn, dst); + /* Issuing a memory fence ensures the ordering of fragments */ + brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, +commit_enable); + + if (devinfo->gen == 7 && !devinfo->is_haswell) { + /* IVB does typed surface access through the render cache, so we need to + * flush it too. Use a different register so both flushes can be + * pipelined by the hardware. + */ + insn = next_insn(p, BRW_OPCODE_SENDC); + brw_set_dest(p, insn, offset(dst, 1)); + brw_set_src0(p, insn, offset(dst, 1)); + brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, + commit_enable); + + /* Now write the response of the second message into the response of the + * first to trigger a pipeline stall -- This way future render and data + * cache messages will be properly ordered with respect to past data and + * render cache messages. + */ + brw_MOV(p, dst