Module: Mesa Branch: master Commit: 6049dc1a9d5cb1a3dae063e52409028213d5492a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6049dc1a9d5cb1a3dae063e52409028213d5492a
Author: Rhys Perry <[email protected]> Date: Thu Sep 3 14:56:26 2020 +0100 aco: improve fsign selection Idea from https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6284 fossil-db (Navi): Totals from 4053 (2.95% of 137413) affected shaders: SGPRs: 305810 -> 305906 (+0.03%); split: -0.01%, +0.04% VGPRs: 249000 -> 249144 (+0.06%); split: -0.01%, +0.07% CodeSize: 29967092 -> 29885768 (-0.27%); split: -0.27%, +0.00% Instrs: 5749494 -> 5737971 (-0.20%); split: -0.20%, +0.00% Cycles: 255028584 -> 254955444 (-0.03%); split: -0.04%, +0.01% Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6583> --- src/amd/compiler/aco_instruction_selection.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 1d543073f1b..2a2dc0687df 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2127,17 +2127,15 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) case nir_op_fsign: { Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0])); if (dst.regClass() == v2b) { - Temp one = bld.copy(bld.def(v1), Operand(0x3c00u)); - Temp minus_one = bld.copy(bld.def(v1), Operand(0xbc00u)); - Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f16, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src); - src = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), one, src, cond); - cond = bld.vopc(aco_opcode::v_cmp_le_f16, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src); - bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), minus_one, src, cond); + assert(ctx->program->chip_class >= GFX9); + /* replace negative zero with positive zero */ + src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand(0u), src); + src = bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand((uint16_t)-1), src, Operand((uint16_t)1u)); + bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src); } else if (dst.regClass() == v1) { - Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src); - src = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0x3f800000u), src, cond); - cond = bld.vopc(aco_opcode::v_cmp_le_f32, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src); - bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand(0xbf800000u), src, cond); + src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand(0u), src); + src = bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand((uint32_t)-1), src, Operand(1u)); + bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src); } else if (dst.regClass() == v2) { Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src); Temp tmp = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0x3FF00000u)); _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
