Iago Toral Quiroga <ito...@igalia.com> writes: > The previous implementation relied on the std140 alignment rules to > avoid handling misalignment in the case where we are loading more than > 2 double components from a vector, which requires to emit a second load > message. > > This alternative implementation deals with misalignment and is more > flexible going forward. All credit for this goes to Curro, since he > not only suggested this but also wrote the implementation in the > mailing list. > --- > > Curro, maybe I should make you the author and me the reviewer then? :) > It's up to you, feel free to add either my:
Signed-off-by: Francisco Jerez <curroje...@riseup.net> along with your R-b, or: Reviewed-by: Francisco Jerez <curroje...@riseup.net> > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 59 > +++++++------------------------- > 1 file changed, 13 insertions(+), 46 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index ebcc92a..35a6aed 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -3583,9 +3583,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr > nir->info.num_ubos - 1); > } > > - /* Number of 32-bit slots in the type */ > - unsigned type_slots = MAX2(1, type_sz(dest.type) / 4); > - > nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); > if (const_offset == NULL) { > fs_reg base_offset = retype(get_nir_src(instr->src[1]), > @@ -3603,55 +3600,25 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr > * we let CSE deal with duplicate loads. Here we see a vector access > * and we have to split it if necessary. > */ > - fs_reg packed_consts = vgrf(glsl_type::float_type); > - packed_consts.type = dest.type; > + const unsigned type_size = type_sz(dest.type); > + const fs_reg packed_consts = bld.vgrf(BRW_REGISTER_TYPE_F); > + for (unsigned c = 0; c < instr->num_components;) { > + const unsigned base = const_offset->u32[0] + c * type_size; > > - unsigned const_offset_aligned = const_offset->u32[0] & ~15; > + /* Number of usable components in the next 16B-aligned load */ > + const unsigned count = MIN2(instr->num_components - c, > + (16 - base % 16) / type_size); > > - /* A vec4 only contains half of a dvec4, if we need more than 2 > - * components of a dvec4 we will have to issue another load for > - * components z and w. > - */ > - int num_components; > - if (type_slots == 1) > - num_components = instr->num_components; > - else > - num_components = MIN2(2, instr->num_components); > - > - /* The computation of num_components doesn't take into account > - * misalignment, which should be okay according to std140 vector > - * alignment rules. > - */ > - assert(const_offset->u32[0] % 16 + > - type_sz(dest.type) * num_components <= 16); > - > - int remaining_components = instr->num_components; > - while (remaining_components > 0) { > - /* Read the vec4 from a 16-byte aligned offset */ > - struct brw_reg const_offset_reg = > brw_imm_ud(const_offset_aligned); > bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, > - retype(packed_consts, BRW_REGISTER_TYPE_F), > - surf_index, const_offset_reg); > - > - const fs_reg consts = byte_offset(packed_consts, > (const_offset->u32[0] % 16)); > - unsigned dest_offset = instr->num_components - > remaining_components; > + packed_consts, surf_index, brw_imm_ud(base & ~15)); > > - /* XXX: This doesn't update the sub-16B offset across iterations > of > - * the loop, which should work for std140 vector alignment rules. > - */ > - assert(dest_offset == 0 || const_offset->u32[0] % 16 == 0); > + const fs_reg consts = > + retype(byte_offset(packed_consts, base & 15), dest.type); > > - for (int i = 0; i < num_components; i++) > - bld.MOV(offset(dest, bld, i + dest_offset), component(consts, > i)); > + for (unsigned d = 0; d < count; d++) > + bld.MOV(offset(dest, bld, c + d), component(consts, d)); > > - /* If this is a large enough 64-bit load, we will need to emit > - * another message > - */ > - remaining_components -= num_components; > - assert(remaining_components == 0 || > - (remaining_components <= 2 && type_slots == 2)); > - num_components = remaining_components; > - const_offset_aligned += 16; > + c += count; > } > } > break; > -- > 1.9.1
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev