Module: Mesa Branch: main Commit: e6e1da8124940785920b038a14fdf78f37a1d03b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e6e1da8124940785920b038a14fdf78f37a1d03b
Author: Pavel Ondračka <pavel.ondra...@gmail.com> Date: Wed Sep 20 14:56:14 2023 +0200 r300: lower ftrunc in NIR and remove the backend TRUNC lowering. Shader-db RV370: total instructions in shared programs: 82155 -> 82154 (<.01%) instructions in affected programs: 38 -> 37 (-2.63%) helped: 1 HURT: 0 total consts in shared programs: 80719 -> 80733 (0.02%) consts in affected programs: 2775 -> 2789 (0.50%) helped: 0 HURT: 14 Shader-db RV530: total presub in shared programs: 7676 -> 7702 (0.34%) presub in affected programs: 81 -> 107 (32.10%) helped: 0 HURT: 26 Reviewed-by: Filip Gawin <filip.ga...@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26816> --- src/gallium/drivers/r300/compiler/nir_to_rc.c | 4 +- .../drivers/r300/compiler/r300_nir_algebraic.py | 5 ++- src/gallium/drivers/r300/compiler/radeon_opcodes.c | 7 ---- src/gallium/drivers/r300/compiler/radeon_opcodes.h | 3 -- .../drivers/r300/compiler/radeon_program_alu.c | 49 ---------------------- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 1 - 6 files changed, 7 insertions(+), 62 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/nir_to_rc.c b/src/gallium/drivers/r300/compiler/nir_to_rc.c index 58d37cbfa93..81290e57838 100644 --- a/src/gallium/drivers/r300/compiler/nir_to_rc.c +++ b/src/gallium/drivers/r300/compiler/nir_to_rc.c @@ -2438,7 +2438,6 @@ const void *nir_to_rc_options(struct nir_shader *s, if (s->info.stage == MESA_SHADER_FRAGMENT) { NIR_PASS_V(s, r300_nir_prepare_presubtract); - NIR_PASS_V(s, r300_nir_clean_double_fneg); } NIR_PASS_V(s, nir_lower_int_to_float); @@ -2448,6 +2447,8 @@ const void *nir_to_rc_options(struct nir_shader *s, !options->lower_cmp && !options->lower_fabs); /* bool_to_float generates MOVs for b2f32 that we want to clean up. */ NIR_PASS_V(s, nir_copy_prop); + /* CSE cleanup after late ftrunc lowering. */ + NIR_PASS_V(s, nir_opt_cse); /* At this point we need to clean; * a) fcsel_gt that come from the ftrunc lowering on R300, * b) all flavours of fcsels that read three different temp sources on R500. @@ -2459,6 +2460,7 @@ const void *nir_to_rc_options(struct nir_shader *s, NIR_PASS_V(s, r300_nir_lower_fcsel_r300); NIR_PASS_V(s, r300_nir_lower_flrp); } + NIR_PASS_V(s, r300_nir_clean_double_fneg); NIR_PASS_V(s, nir_opt_dce); nir_move_options move_all = diff --git a/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py b/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py index 1f3514791bc..cf13f30bef1 100644 --- a/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py +++ b/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py @@ -110,7 +110,10 @@ r300_nir_post_integer_lowering = [ # This actually checks for the lowered ffloor(a) = a - ffract(a) patterns. (('fadd(is_only_used_by_load_ubo_vec4)', a, ('fneg', ('ffract', a))), a), # This is a D3D9 pattern from Wine when shader wants ffloor instead of fround on register load. - (('fround_even(is_only_used_by_load_ubo_vec4)', ('fadd', a, ('fneg', ('ffract', a)))), a) + (('fround_even(is_only_used_by_load_ubo_vec4)', ('fadd', a, ('fneg', ('ffract', a)))), a), + # Lower ftrunc + (('ftrunc', 'a@32'), ('fcsel_ge', a, ('fadd', ('fabs', a), ('fneg', ('ffract', ('fabs', a)))), + ('fneg', ('fadd', ('fabs', a), ('fneg', ('ffract', ('fabs', a))))))) ] def main(): diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c index 1458d03aac6..01f01aa8696 100644 --- a/src/gallium/drivers/r300/compiler/radeon_opcodes.c +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c @@ -289,13 +289,6 @@ const struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .HasDstReg = 1, .IsComponentwise = 1 }, - { - .Opcode = RC_OPCODE_TRUNC, - .Name = "TRUNC", - .NumSrcRegs = 1, - .HasDstReg = 1, - .IsComponentwise = 1 - }, { .Opcode = RC_OPCODE_TEX, .Name = "TEX", diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h index 88d6f212ba2..19a9ff280bc 100644 --- a/src/gallium/drivers/r300/compiler/radeon_opcodes.h +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h @@ -152,9 +152,6 @@ typedef enum { /** vec4 instruction: dst.c = src0.c - src1.c */ RC_OPCODE_SUB, - /** vec4 instruction: dst.c = (abs(src0.c) - fract(abs(src0.c))) * sgn(src0.c) */ - RC_OPCODE_TRUNC, - RC_OPCODE_TEX, RC_OPCODE_TXB, RC_OPCODE_TXD, diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c index a56d81c62e7..85e8afb3ed4 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c @@ -212,42 +212,6 @@ static void transform_DP2(struct radeon_compiler* c, rc_remove_instruction(inst); } -static void transform_TRUNC(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - /* Definition of trunc: - * trunc(x) = (abs(x) - fract(abs(x))) * sgn(x) - * - * The multiplication by sgn(x) can be simplified using CMP: - * y * sgn(x) = (x < 0 ? -y : y) - */ - - struct rc_src_register abs; - - if (c->is_r500 || c->type == RC_FRAGMENT_PROGRAM) { - abs = absolute(inst->U.I.SrcReg[0]); - } else { - /* abs isn't free on r300's and r400's vertex shader, - * so we want to avoid doing it twice - */ - int tmp = rc_find_free_temporary(c); - - emit2(c, inst->Prev, RC_OPCODE_MAX, NULL, dstregtmpmask(tmp, RC_MASK_XYZW), - srcregswz(inst->U.I.SrcReg[0].File, inst->U.I.SrcReg[0].Index, RC_SWIZZLE_XYZW), - negate(srcregswz(inst->U.I.SrcReg[0].File, inst->U.I.SrcReg[0].Index, RC_SWIZZLE_XYZW))); - abs = srcregswz(RC_FILE_TEMPORARY, tmp, inst->U.I.SrcReg[0].Swizzle); - - } - struct rc_dst_register dst = new_dst_reg(c, inst); - emit1(c, inst->Prev, RC_OPCODE_FRC, NULL, dst, abs); - emit2(c, inst->Prev, RC_OPCODE_ADD, NULL, dst, abs, - negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); - emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0], - negate(srcreg(RC_FILE_TEMPORARY, dst.Index)), srcreg(RC_FILE_TEMPORARY, dst.Index)); - - rc_remove_instruction(inst); -} - static void transform_LRP(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -386,7 +350,6 @@ int radeonTransformALU( case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; case RC_OPCODE_SNE: transform_SNE(c, inst); return 1; case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; - case RC_OPCODE_TRUNC: transform_TRUNC(c, inst); return 1; default: return 0; } @@ -545,17 +508,6 @@ static void transform_r300_vertex_SLE(struct radeon_compiler* c, inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; } -static void transform_vertex_TRUNC(struct radeon_compiler* c, - struct rc_instruction* inst) -{ - struct rc_instruction *next = inst->Next; - - /* next->Prev is removed after each transformation and replaced - * by a new instruction. */ - transform_TRUNC(c, next->Prev); - transform_r300_vertex_CMP(c, next->Prev); -} - /** * For use with rc_local_transform, this transforms non-native ALU * instructions of the r300 up to r500 vertex engine. @@ -586,7 +538,6 @@ int r300_transform_vertex_alu( } return 0; case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; - case RC_OPCODE_TRUNC: transform_vertex_TRUNC(c, inst); return 1; default: return 0; } diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 82842c5fd67..afe809e05a2 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -81,7 +81,6 @@ static unsigned translate_opcode(unsigned opcode) case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE; case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF; case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; - case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC; case TGSI_OPCODE_CONT: return RC_OPCODE_CONT; case TGSI_OPCODE_NOP: return RC_OPCODE_NOP; case TGSI_OPCODE_KILL_IF: return RC_OPCODE_KIL;