On Thu, 2016-05-12 at 13:35 +0200, Samuel Iglesias Gonsálvez wrote: > From: Iago Toral Quiroga <ito...@igalia.com> > > This is pretty much the same we do with SSBOs. > --- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 37 > +++++++++++++++++++++++++++----- > 1 file changed, 32 insertions(+), 5 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index f6837e4..419f940 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -3058,6 +3058,26 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder > &bld, > /* Writemask */ > unsigned writemask = instr->const_index[1]; > > + /* get_nir_src() retypes to integer. Be wary of 64-bit types though > + * since the untyped writes below operate in units of 32-bits, which > + * means that we need to write twice as many components each time. > + * Also, we have to suffle 64-bit data to be in the appropriate layout > + * expected by our 32-bit write messages. > + */ > + unsigned type_size = 4; > + unsigned bit_size = instr->src[0].is_ssa ? > + instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size; > + if (bit_size == 64) { > + type_size = 8; > + shuffle_64bit_data_for_32bit_write( > + bld, > + retype(val_reg, BRW_REGISTER_TYPE_F), > + retype(val_reg, BRW_REGISTER_TYPE_DF),
I have just noticed that this is not correct, we can't do the shuffling in val_reg directly. We need to use a temporary like we do for ssbo stores. I'll fix that. > + instr->num_components); > + } > + > + unsigned type_slots = type_size / 4; > + > /* Combine groups of consecutive enabled channels in one write > * message. We use ffs to find the first enabled channel and then ffs > on > * the bit-inverse, down-shifted writemask to determine the length of > @@ -3066,22 +3086,29 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder > &bld, > while (writemask) { > unsigned first_component = ffs(writemask) - 1; > unsigned length = ffs(~(writemask >> first_component)) - 1; > - fs_reg offset_reg; > > + /* We can't write more than 2 64-bit components at once. Limit the > + * length of the write to what we can do and let the next iteration > + * handle the rest > + */ > + if (type_size > 4) > + length = MIN2(2, length); > + > + fs_reg offset_reg; > nir_const_value *const_offset = > nir_src_as_const_value(instr->src[1]); > if (const_offset) { > offset_reg = brw_imm_ud(instr->const_index[0] + > const_offset->u32[0] + > - 4 * first_component); > + type_size * first_component); > } else { > offset_reg = vgrf(glsl_type::uint_type); > bld.ADD(offset_reg, > retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD), > - brw_imm_ud(instr->const_index[0] + 4 * first_component)); > + brw_imm_ud(instr->const_index[0] + type_size * > first_component)); > } > > emit_untyped_write(bld, surf_index, offset_reg, > - offset(val_reg, bld, first_component), > - 1 /* dims */, length, > + offset(val_reg, bld, first_component * > type_slots), > + 1 /* dims */, length * type_slots, > BRW_PREDICATE_NONE); > > /* Clear the bits in the writemask that we just wrote, then try _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev