Reviewed-by: Iago Toral Quiroga <ito...@igalia.com> On Tue, 2015-12-01 at 14:35 -0800, Jordan Justen wrote: > v4: > * Apply similar optimization for shared variable stores as > 0cb7d7b4b7c32246d4c4225a1d17d7ff79a7526d. This was causing a > OpenGLES 3.1 CTS failure, but > 867c436ca841b4196b4dde4786f5086c76b20dd7 fixes that. > > Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com> > Cc: Iago Toral Quiroga <ito...@igalia.com> > --- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 48 > ++++++++++++++++++++++++++++++++ > 1 file changed, 48 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 12a8b59..d945cef 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -2472,6 +2472,54 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr > break; > } > > + case nir_intrinsic_store_shared_indirect: > + has_indirect = true; > + /* fallthrough */ > + case nir_intrinsic_store_shared: { > + assert(devinfo->gen >= 7); > + > + /* Block index */ > + fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); > + > + /* Value */ > + fs_reg val_reg = get_nir_src(instr->src[0]); > + > + /* Writemask */ > + unsigned writemask = instr->const_index[1]; > + > + /* Combine groups of consecutive enabled channels in one write > + * message. We use ffs to find the first enabled channel and then ffs > on > + * the bit-inverse, down-shifted writemask to determine the length of > + * the block of enabled bits. > + */ > + while (writemask) { > + unsigned first_component = ffs(writemask) - 1; > + unsigned length = ffs(~(writemask >> first_component)) - 1; > + fs_reg offset_reg; > + > + if (!has_indirect) { > + offset_reg = brw_imm_ud(instr->const_index[0] + 4 * > first_component); > + } else { > + offset_reg = vgrf(glsl_type::uint_type); > + bld.ADD(offset_reg, > + retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD), > + brw_imm_ud(4 * first_component)); > + } > + > + emit_untyped_write(bld, surf_index, offset_reg, > + offset(val_reg, bld, first_component), > + 1 /* dims */, length, > + BRW_PREDICATE_NONE); > + > + /* Clear the bits in the writemask that we just wrote, then try > + * again to see if more channels are left. > + */ > + writemask &= (15 << (first_component + length)); > + } > + > + break; > + } > + > case nir_intrinsic_load_input_indirect: > has_indirect = true; > /* fallthrough */
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev