On Fri, Mar 24, 2017 at 01:42:23PM -0700, Francisco Jerez wrote: > Samuel Iglesias Gonsálvez <sigles...@igalia.com> writes: > > > Generalize it to lower any unsupported narrower conversion. > > > > v2 (Curro): > > - Add supports_type_conversion() > > - Reuse existing intruction instead of cloning it. > > - Generalize d2x to narrower and equal size conversions. > > > > v3 (Curro): > > - Make supports_type_conversion() const and improve it. > > - Use foreach_block_and_inst to process added instructions. > > - Simplify code. > > - Add assert and improve comments. > > - Remove redundant mov. > > - Remove useless comment. > > - Remove saturate == false assert and add support for saturation > > when fixing the conversion. > > - Add get_exec_type() function. > > > > Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> > > --- > > > > This patch replaces the respective v3 one. > > > > src/intel/compiler/brw_fs.cpp | 11 +-- > > src/intel/compiler/brw_fs_lower_d2x.cpp | 117 > > +++++++++++++++++++++++--------- > > 2 files changed, 91 insertions(+), 37 deletions(-) > > > > diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp > > index 086b1a04855..8eb8789905c 100644 > > --- a/src/intel/compiler/brw_fs.cpp > > +++ b/src/intel/compiler/brw_fs.cpp > > @@ -5694,11 +5694,6 @@ fs_visitor::optimize() > > OPT(dead_code_eliminate); > > } > > > > - if (OPT(lower_d2x)) { > > - OPT(opt_copy_propagation); > > - OPT(dead_code_eliminate); > > - } > > - > > OPT(lower_simd_width); > > > > /* After SIMD lowering just in case we had to unroll the EOT send. */ > > @@ -5745,6 +5740,12 @@ fs_visitor::optimize() > > OPT(dead_code_eliminate); > > } > > > > + if (OPT(lower_d2x)) { > > + OPT(opt_copy_propagation); > > + OPT(dead_code_eliminate); > > + OPT(lower_simd_width); > > + } > > + > > lower_uniform_pull_constant_loads(); > > > > validate(); > > diff --git a/src/intel/compiler/brw_fs_lower_d2x.cpp > > b/src/intel/compiler/brw_fs_lower_d2x.cpp > > index a2db1154615..9bc78fa969b 100644 > > --- a/src/intel/compiler/brw_fs_lower_d2x.cpp > > +++ b/src/intel/compiler/brw_fs_lower_d2x.cpp > > @@ -27,48 +27,101 @@ > > > > using namespace brw; > > > > +static unsigned > > The return type is not really an integer. > > > +get_exec_type(const fs_inst *inst) > > +{ > > + unsigned exec_type = 0; > > This isn't either. You could initialize this to BRW_REGISTER_TYPE_B > which isn't a valid execution type so you can drop the whole > has_valid_source tracking.
OK. > > > + bool has_valid_source = false; > > + > > + for (int i = 0; i < inst->sources; i++) { > > + if (inst->src[i].type != BAD_FILE) { > > + if (!has_valid_source) { > > + exec_type = inst->src[i].type; > > + has_valid_source = true; > > + } else { > > + if (type_sz(inst->src[i].type) > type_sz(exec_type)) > > + exec_type = inst->src[i].type; > > Note that this doesn't handle vector immediates correctly (which give > you either an F or W execution type), byte nor unsigned source types. I > suggest you add a get_exec_type(brw_reg_type) overload you'd call here > to translate a source type into the matching exec type. OK > > To handle cases where you have mixed floating point and integer types it > would probably be sensible to do something along the lines of: > > | const brw_reg_type t = get_exec_type(inst->src[i].type); > | // ... > | if (type_sz(t) == type_sz(exec_type) && is_floating_point(t)) > | exec_type = t; OK, I will add this. > > > + } > > + } > > + } > > + > > + /* FIXME: if this assert fails, then the instruction has no valid > > sources */ > > + assert(has_valid_source); > > + > > You could assert 'exec_type != BRW_REGISTER_TYPE_HF || inst->dst.type == > BRW_REGISTER_TYPE_HF' so we remember to handle half-float conversions > here when they are enabled in the back-end. > OK, I am going to add a "TO-DO" comment together with this assert. > > + return exec_type; > > With this in place the get_exec_type_size(inst) helper seems redundant > since it should be equivalent to type_sz(get_exec_type(inst)). I think > this should replace PATCH 3. > OK, I am going to edit patch 3 to have this. > > +} > > + > > +static bool > > +supports_type_conversion(const fs_inst *inst) { > > + switch(inst->opcode) { > > + case BRW_OPCODE_MOV: > > + case SHADER_OPCODE_MOV_INDIRECT: > > + return true; > > + case BRW_OPCODE_SEL: > > + return inst->dst.type == get_exec_type(inst); > > + default: > > + /* FIXME: We assume the opcodes don't explicitly mentioned > > + * before just work fine with arbitrary conversions. > > + */ > > + return true; > > + } > > +} > > + > > bool > > fs_visitor::lower_d2x() > > { > > bool progress = false; > > > > - foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { > > - if (inst->opcode != BRW_OPCODE_MOV) > > - continue; > > - > > - if (inst->dst.type != BRW_REGISTER_TYPE_F && > > - inst->dst.type != BRW_REGISTER_TYPE_D && > > - inst->dst.type != BRW_REGISTER_TYPE_UD) > > - continue; > > + foreach_block_and_inst(block, fs_inst, inst, cfg) { > > + const fs_builder ibld(this, block, inst); > > + fs_reg dst = inst->dst; > > + bool saturate = inst->saturate; > > > > - if (inst->src[0].type != BRW_REGISTER_TYPE_DF && > > - inst->src[0].type != BRW_REGISTER_TYPE_UQ && > > - inst->src[0].type != BRW_REGISTER_TYPE_Q) > > - continue; > > + if (supports_type_conversion(inst)) { > > + if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < > > 8) { > > + /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision > > Float to > > + * Single Precision Float": > > + * > > + * The upper Dword of every Qword will be written with > > undefined > > + * value when converting DF to F. > > + * > > + * So we need to allocate a temporary that's two registers, > > and then do > > + * a strided MOV to get the lower DWord of every Qword that > > has the > > + * result. > > + */ > > + fs_reg temp = ibld.vgrf(inst->src[0].type); > > This should probably be using the execution type. Right. > > > + fs_reg strided_temp = subscript(temp, dst.type, 0); > > > > - assert(inst->dst.file == VGRF); > > - assert(inst->saturate == false); > > - fs_reg dst = inst->dst; > > + assert(inst->size_written == > > inst->dst.component_size(inst->exec_size)); > > + inst->dst = strided_temp; > > + inst->saturate = false; > > + /* As it is an strided destination, we write n-times more > > being n the > > + * size ratio between source and destination types. Update > > + * size_written accordingly. > > + */ > > + inst->size_written = > > inst->dst.component_size(inst->exec_size); > > + ibld.at(block, inst->next).MOV(dst, strided_temp)->saturate = > > saturate; > > > > - const fs_builder ibld(this, block, inst); > > + progress = true; > > + } > > + } else { > > + fs_reg temp0 = ibld.vgrf(inst->src[0].type); > > Same here. With these taken into account and the get_exec_type() helper > moved into PATCH 3 this patch is: > > Reviewed-by: Francisco Jerez <curroje...@riseup.net> > OK, thanks! Sam > > > > - /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to > > - * Single Precision Float": > > - * > > - * The upper Dword of every Qword will be written with undefined > > - * value when converting DF to F. > > - * > > - * So we need to allocate a temporary that's two registers, and then > > do > > - * a strided MOV to get the lower DWord of every Qword that has the > > - * result. > > - */ > > - fs_reg temp = ibld.vgrf(inst->src[0].type, 1); > > - fs_reg strided_temp = subscript(temp, inst->dst.type, 0); > > - ibld.MOV(strided_temp, inst->src[0]); > > - ibld.MOV(dst, strided_temp); > > + assert(inst->size_written == > > inst->dst.component_size(inst->exec_size)); > > + inst->dst = temp0; > > + /* As it is an strided destination, we write n-times more being n > > the > > + * size ratio between source and destination types. Update > > + * size_written accordingly. > > + */ > > + inst->size_written = inst->dst.component_size(inst->exec_size); > > + inst->saturate = false; > > + /* Now, do the conversion to original destination's type. In next > > iteration, > > + * we will lower it if it is a d2f conversion. > > + */ > > + ibld.at(block, inst->next).MOV(dst, temp0)->saturate = saturate; > > > > - inst->remove(block); > > - progress = true; > > + progress = true; > > + } > > } > > > > if (progress) > > -- > > 2.11.0
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev