On Wed, Apr 11, 2018 at 12:20 AM, Iago Toral Quiroga <ito...@igalia.com> wrote:
> The lowering pass was specialized to act on 64-bit to 32-bit conversions > only, > but the implementation is valid for other cases. > --- > src/intel/compiler/brw_fs_lower_conversions.cpp | 5 ++++- > src/intel/compiler/brw_fs_nir.cpp | 14 +++----------- > 2 files changed, 7 insertions(+), 12 deletions(-) > > diff --git a/src/intel/compiler/brw_fs_lower_conversions.cpp > b/src/intel/compiler/brw_fs_lower_conversions.cpp > index 663c9674c49..f95b39d3e86 100644 > --- a/src/intel/compiler/brw_fs_lower_conversions.cpp > +++ b/src/intel/compiler/brw_fs_lower_conversions.cpp > @@ -54,7 +54,7 @@ fs_visitor::lower_conversions() > bool saturate = inst->saturate; > > if (supports_type_conversion(inst)) { > - if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < > 8) { > + if (type_sz(inst->dst.type) < get_exec_type_size(inst)) { > /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision > Float to > * Single Precision Float": > * > @@ -64,6 +64,9 @@ fs_visitor::lower_conversions() > * So we need to allocate a temporary that's two registers, > and then do > * a strided MOV to get the lower DWord of every Qword that > has the > * result. > + * > + * This restriction applies, in general, whenever we convert > to > + * a type with a smaller bit-size. > */ > fs_reg temp = ibld.vgrf(get_exec_type(inst)); > fs_reg strided_temp = subscript(temp, dst.type, 0); > diff --git a/src/intel/compiler/brw_fs_nir.cpp > b/src/intel/compiler/brw_fs_nir.cpp > index f40a3540e31..5e0dd37eefd 100644 > --- a/src/intel/compiler/brw_fs_nir.cpp > +++ b/src/intel/compiler/brw_fs_nir.cpp > @@ -753,19 +753,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, > nir_alu_instr *instr) > */ > > case nir_op_f2f16_undef: > - case nir_op_i2i16: > - case nir_op_u2u16: { > - /* TODO: Fixing aligment rules for conversions from 32-bits to > - * 16-bit types should be moved to lower_conversions > - */ > - fs_reg tmp = bld.vgrf(op[0].type, 1); > - tmp = subscript(tmp, result.type, 0); > - inst = bld.MOV(tmp, op[0]); > - inst->saturate = instr->dest.saturate; > - inst = bld.MOV(result, tmp); > + inst = bld.MOV(result, op[0]); > inst->saturate = instr->dest.saturate; > break; > It appears to me that we can move f2f16_undef to the block below as well. Without or without that, Reviewed-by: Jason Ekstrand <ja...@jlekstrand.net> > - } > > case nir_op_f2f64: > case nir_op_f2i64: > @@ -803,6 +793,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, > nir_alu_instr *instr) > case nir_op_f2u32: > case nir_op_i2i32: > case nir_op_u2u32: > + case nir_op_i2i16: > + case nir_op_u2u16: > inst = bld.MOV(result, op[0]); > inst->saturate = instr->dest.saturate; > break; > -- > 2.14.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev