Module: Mesa Branch: master Commit: 79f1a7ae28c37f77e08e550cd077959a2a1f8341 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=79f1a7ae28c37f77e08e550cd077959a2a1f8341
Author: Antia Puentes <[email protected]> Date: Wed Aug 5 15:57:33 2015 +0200 i965/vec4: Fix saturation errors when coalescing registers If the register types do not match and the instruction that contains the final destination is saturated, register coalescing generated non-equivalent code. This did not happen when using IR because types usually matched, but it is visible in nir-vec4. For example, mov vgrf7:D vgrf2:D mov.sat m4:F vgrf7:F is coalesced to: mov.sat m4:D vgrf2:D The patch prevents coalescing in such scenario, unless the instruction we want to coalesce into is a MOV (without type conversion implied). In that case, the patch sets the register types to the type of the final destination. Shader-db results in HSW (only vec4 instructions shown): total instructions in shared programs: 1754415 -> 1754416 (0.00%) instructions in affected programs: 74 -> 75 (1.35%) helped: 0 HURT: 1 GAINED: 0 LOST: 0 Only one extra instruction in one of the shaders, that comes from eliminating a saturation error by preventing register coalesce. Cc: "10.6 11.0" <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]> --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 9d863c2..181768b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1065,6 +1065,17 @@ vec4_visitor::opt_register_coalesce() } } + /* This doesn't handle saturation on the instruction we + * want to coalesce away if the register types do not match. + * But if scan_inst is a non type-converting 'mov', we can fix + * the types later. + */ + if (inst->saturate && + inst->dst.type != scan_inst->dst.type && + !(scan_inst->opcode == BRW_OPCODE_MOV && + scan_inst->dst.type == scan_inst->src[0].type)) + break; + /* If we can't handle the swizzle, bail. */ if (!scan_inst->can_reswizzle(inst->dst.writemask, inst->src[0].swizzle, @@ -1142,6 +1153,16 @@ vec4_visitor::opt_register_coalesce() scan_inst->dst.file = inst->dst.file; scan_inst->dst.reg = inst->dst.reg; scan_inst->dst.reg_offset = inst->dst.reg_offset; + if (inst->saturate && + inst->dst.type != scan_inst->dst.type) { + /* If we have reached this point, scan_inst is a non + * type-converting 'mov' and we can modify its register types + * to match the ones in inst. Otherwise, we could have an + * incorrect saturation result. + */ + scan_inst->dst.type = inst->dst.type; + scan_inst->src[0].type = inst->src[0].type; + } scan_inst->saturate |= inst->saturate; } scan_inst = (vec4_instruction *)scan_inst->next; _______________________________________________ mesa-commit mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-commit
