Module: Mesa Branch: main Commit: 21304b772c285a9c90900ac9b2ef25202b7aa1aa URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=21304b772c285a9c90900ac9b2ef25202b7aa1aa
Author: Rhys Perry <[email protected]> Date: Mon Jan 17 17:54:47 2022 +0000 aco: apply clamp to v_fma_mix fossil-db (Sienna Cichlid): Totals from 2536 (1.88% of 134913) affected shaders: CodeSize: 17314568 -> 17282960 (-0.18%) Instrs: 3191438 -> 3187487 (-0.12%) Latency: 59465090 -> 59407885 (-0.10%) InvThroughput: 10271466 -> 10260512 (-0.11%) fossil-db (Navi): Totals from 2512 (1.86% of 134913) affected shaders: CodeSize: 17194700 -> 17173396 (-0.12%) Instrs: 3215093 -> 3212430 (-0.08%) Latency: 60174315 -> 60142593 (-0.05%) InvThroughput: 9491103 -> 9483979 (-0.08%) fossil-db (Vega): Totals from 2512 (1.86% of 135048) affected shaders: CodeSize: 17186776 -> 17165472 (-0.12%) Instrs: 3311166 -> 3308503 (-0.08%) Latency: 65737409 -> 65716096 (-0.03%) InvThroughput: 21735857 -> 21719792 (-0.07%) Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14769> --- src/amd/compiler/aco_optimizer.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index d99a97e7d9e..7109c2e4f7b 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -3017,11 +3017,13 @@ apply_omod_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr) return false; bool can_vop3 = can_use_VOP3(ctx, instr); - if (!instr->isSDWA() && !can_vop3) + bool is_mad_mix = + instr->opcode == aco_opcode::v_fma_mix_f32 || instr->opcode == aco_opcode::v_fma_mixlo_f16; + if (!instr->isSDWA() && !is_mad_mix && !can_vop3) return false; - /* omod flushes -0 to +0 and has no effect if denormals are enabled */ - bool can_use_omod = (can_vop3 || ctx.program->chip_class >= GFX9); /* SDWA omod is GFX9+ */ + /* omod flushes -0 to +0 and has no effect if denormals are enabled. SDWA omod is GFX9+. */ + bool can_use_omod = (can_vop3 || ctx.program->chip_class >= GFX9) && !instr->isVOP3P(); if (instr->definitions[0].bytes() == 4) can_use_omod = can_use_omod && ctx.fp_mode.denorm32 == 0 && !ctx.fp_mode.preserve_signed_zero_inf_nan32; @@ -3048,6 +3050,9 @@ apply_omod_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr) if (instr->isSDWA()) { if (!apply_omod_clamp_helper(ctx, &instr->sdwa(), def_info)) return false; + } else if (instr->isVOP3P()) { + assert(def_info.is_clamp()); + instr->vop3p().clamp = true; } else { to_VOP3(ctx, instr); if (!apply_omod_clamp_helper(ctx, &instr->vop3(), def_info))
