Generalize it to lower any unsupported narrower conversion. v2 (Curro): - Add supports_type_conversion() - Reuse existing intruction instead of cloning it. - Generalize d2x to narrower and equal size conversions.
Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> --- src/intel/compiler/brw_fs.cpp | 11 ++-- src/intel/compiler/brw_fs_lower_d2x.cpp | 97 ++++++++++++++++++++++++--------- 2 files changed, 77 insertions(+), 31 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 6da23b17107..8612a83805d 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -5694,11 +5694,6 @@ fs_visitor::optimize() OPT(dead_code_eliminate); } - if (OPT(lower_d2x)) { - OPT(opt_copy_propagation); - OPT(dead_code_eliminate); - } - OPT(lower_simd_width); /* After SIMD lowering just in case we had to unroll the EOT send. */ @@ -5745,6 +5740,12 @@ fs_visitor::optimize() OPT(dead_code_eliminate); } + if (OPT(lower_d2x)) { + OPT(opt_copy_propagation); + OPT(dead_code_eliminate); + OPT(lower_simd_width); + } + lower_uniform_pull_constant_loads(); validate(); diff --git a/src/intel/compiler/brw_fs_lower_d2x.cpp b/src/intel/compiler/brw_fs_lower_d2x.cpp index a2db1154615..fa25d313dcd 100644 --- a/src/intel/compiler/brw_fs_lower_d2x.cpp +++ b/src/intel/compiler/brw_fs_lower_d2x.cpp @@ -27,47 +27,92 @@ using namespace brw; +static bool +supports_type_conversion(fs_inst *inst) { + switch(inst->opcode) { + case BRW_OPCODE_MOV: + case SHADER_OPCODE_MOV_INDIRECT: + return true; + case BRW_OPCODE_SEL: + return false; + default: + /* FIXME: We assume the opcodes don't explicitly mentioned + * before just work fine with arbitrary conversions. + */ + return true; + } +} + bool fs_visitor::lower_d2x() { bool progress = false; foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { - if (inst->opcode != BRW_OPCODE_MOV) - continue; + bool inst_support_conversion = supports_type_conversion(inst); + bool supported_conversion = + inst_support_conversion && + (get_exec_type_size(inst) != 8 || + type_sz(inst->dst.type) > 4 || + type_sz(inst->dst.type) >= get_exec_type_size(inst)); - if (inst->dst.type != BRW_REGISTER_TYPE_F && - inst->dst.type != BRW_REGISTER_TYPE_D && - inst->dst.type != BRW_REGISTER_TYPE_UD) + /* If the conversion is supported or there is no conversion then + * do nothing. + */ + if (supported_conversion || + (!inst_support_conversion && inst->dst.type == inst->src[0].type) || + inst->dst.file == BAD_FILE || inst->src[0].file == BAD_FILE) continue; - if (inst->src[0].type != BRW_REGISTER_TYPE_DF && - inst->src[0].type != BRW_REGISTER_TYPE_UQ && - inst->src[0].type != BRW_REGISTER_TYPE_Q) - continue; + /* This pass only supports conversion to narrower or equal size types. */ + if (get_exec_type_size(inst) < type_sz(inst->dst.type)) + continue; - assert(inst->dst.file == VGRF); assert(inst->saturate == false); - fs_reg dst = inst->dst; const fs_builder ibld(this, block, inst); + fs_reg dst = inst->dst; - /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to - * Single Precision Float": - * - * The upper Dword of every Qword will be written with undefined - * value when converting DF to F. - * - * So we need to allocate a temporary that's two registers, and then do - * a strided MOV to get the lower DWord of every Qword that has the - * result. - */ - fs_reg temp = ibld.vgrf(inst->src[0].type, 1); - fs_reg strided_temp = subscript(temp, inst->dst.type, 0); - ibld.MOV(strided_temp, inst->src[0]); - ibld.MOV(dst, strided_temp); + if (inst_support_conversion && !supported_conversion) { + /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to + * Single Precision Float": + * + * The upper Dword of every Qword will be written with undefined + * value when converting DF to F. + * + * So we need to allocate a temporary that's two registers, and then do + * a strided MOV to get the lower DWord of every Qword that has the + * result. + */ + fs_reg temp = ibld.vgrf(inst->src[0].type, 1); + fs_reg strided_temp = subscript(temp, dst.type, 0); + + /* We clone the original instruction as we are going to modify it + * and emit another one after it. + */ + inst->dst = strided_temp; + /* As it is an strided destination, we write n-times more being n the + * size difference between source and destination types. Update + * size_written with the new destination. + */ + inst->size_written = inst->dst.component_size(inst->exec_size); + ibld.at(block, inst->next).MOV(dst, strided_temp); + } else { + fs_reg temp0 = ibld.vgrf(inst->src[0].type, 1); + fs_reg temp1 = ibld.vgrf(inst->src[0].type, 1); + fs_reg strided_temp1 = subscript(temp1, dst.type, 0); + + inst->dst = temp0; + /* As it is an strided destination, we write n-times more being n the + * size difference between source and destination types. Update + * size_written with the new destination. + */ + inst->size_written = inst->dst.component_size(inst->exec_size); - inst->remove(block); + /* Now, do the conversion to original destination's type. */ + fs_inst *mov = ibld.at(block, inst->next).MOV(strided_temp1, temp0); + ibld.at(block, mov->next).MOV(dst, strided_temp1); + } progress = true; } -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev