Module: Mesa
Branch: main
Commit: e6e1da8124940785920b038a14fdf78f37a1d03b
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e6e1da8124940785920b038a14fdf78f37a1d03b

Author: Pavel Ondračka <pavel.ondra...@gmail.com>
Date:   Wed Sep 20 14:56:14 2023 +0200

r300: lower ftrunc in NIR

and remove the backend TRUNC lowering.

Shader-db RV370:
total instructions in shared programs: 82155 -> 82154 (<.01%)
instructions in affected programs: 38 -> 37 (-2.63%)
helped: 1
HURT: 0
total consts in shared programs: 80719 -> 80733 (0.02%)
consts in affected programs: 2775 -> 2789 (0.50%)
helped: 0
HURT: 14

Shader-db RV530:
total presub in shared programs: 7676 -> 7702 (0.34%)
presub in affected programs: 81 -> 107 (32.10%)
helped: 0
HURT: 26

Reviewed-by: Filip Gawin <filip.ga...@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26816>

---

 src/gallium/drivers/r300/compiler/nir_to_rc.c      |  4 +-
 .../drivers/r300/compiler/r300_nir_algebraic.py    |  5 ++-
 src/gallium/drivers/r300/compiler/radeon_opcodes.c |  7 ----
 src/gallium/drivers/r300/compiler/radeon_opcodes.h |  3 --
 .../drivers/r300/compiler/radeon_program_alu.c     | 49 ----------------------
 src/gallium/drivers/r300/r300_tgsi_to_rc.c         |  1 -
 6 files changed, 7 insertions(+), 62 deletions(-)

diff --git a/src/gallium/drivers/r300/compiler/nir_to_rc.c 
b/src/gallium/drivers/r300/compiler/nir_to_rc.c
index 58d37cbfa93..81290e57838 100644
--- a/src/gallium/drivers/r300/compiler/nir_to_rc.c
+++ b/src/gallium/drivers/r300/compiler/nir_to_rc.c
@@ -2438,7 +2438,6 @@ const void *nir_to_rc_options(struct nir_shader *s,
 
    if (s->info.stage == MESA_SHADER_FRAGMENT) {
       NIR_PASS_V(s, r300_nir_prepare_presubtract);
-      NIR_PASS_V(s, r300_nir_clean_double_fneg);
    }
 
    NIR_PASS_V(s, nir_lower_int_to_float);
@@ -2448,6 +2447,8 @@ const void *nir_to_rc_options(struct nir_shader *s,
               !options->lower_cmp && !options->lower_fabs);
    /* bool_to_float generates MOVs for b2f32 that we want to clean up. */
    NIR_PASS_V(s, nir_copy_prop);
+   /* CSE cleanup after late ftrunc lowering. */
+   NIR_PASS_V(s, nir_opt_cse);
    /* At this point we need to clean;
     *  a) fcsel_gt that come from the ftrunc lowering on R300,
     *  b) all flavours of fcsels that read three different temp sources on 
R500.
@@ -2459,6 +2460,7 @@ const void *nir_to_rc_options(struct nir_shader *s,
          NIR_PASS_V(s, r300_nir_lower_fcsel_r300);
       NIR_PASS_V(s, r300_nir_lower_flrp);
    }
+   NIR_PASS_V(s, r300_nir_clean_double_fneg);
    NIR_PASS_V(s, nir_opt_dce);
 
    nir_move_options move_all =
diff --git a/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py 
b/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py
index 1f3514791bc..cf13f30bef1 100644
--- a/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py
+++ b/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py
@@ -110,7 +110,10 @@ r300_nir_post_integer_lowering = [
         # This actually checks for the lowered ffloor(a) = a - ffract(a) 
patterns.
         (('fadd(is_only_used_by_load_ubo_vec4)', a, ('fneg', ('ffract', a))), 
a),
         # This is a D3D9 pattern from Wine when shader wants ffloor instead of 
fround on register load.
-        (('fround_even(is_only_used_by_load_ubo_vec4)', ('fadd', a, ('fneg', 
('ffract', a)))), a)
+        (('fround_even(is_only_used_by_load_ubo_vec4)', ('fadd', a, ('fneg', 
('ffract', a)))), a),
+        # Lower ftrunc
+        (('ftrunc', 'a@32'), ('fcsel_ge', a, ('fadd', ('fabs', a), ('fneg', 
('ffract', ('fabs', a)))),
+                                     ('fneg', ('fadd', ('fabs', a), ('fneg', 
('ffract', ('fabs', a)))))))
 ]
 
 def main():
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c 
b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
index 1458d03aac6..01f01aa8696 100644
--- a/src/gallium/drivers/r300/compiler/radeon_opcodes.c
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
@@ -289,13 +289,6 @@ const struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
                .HasDstReg = 1,
                .IsComponentwise = 1
        },
-       {
-               .Opcode = RC_OPCODE_TRUNC,
-               .Name = "TRUNC",
-               .NumSrcRegs = 1,
-               .HasDstReg = 1,
-               .IsComponentwise = 1
-       },
        {
                .Opcode = RC_OPCODE_TEX,
                .Name = "TEX",
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h 
b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
index 88d6f212ba2..19a9ff280bc 100644
--- a/src/gallium/drivers/r300/compiler/radeon_opcodes.h
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
@@ -152,9 +152,6 @@ typedef enum {
        /** vec4 instruction: dst.c = src0.c - src1.c */
        RC_OPCODE_SUB,
 
-       /** vec4 instruction: dst.c = (abs(src0.c) - fract(abs(src0.c))) * 
sgn(src0.c) */
-       RC_OPCODE_TRUNC,
-
        RC_OPCODE_TEX,
        RC_OPCODE_TXB,
        RC_OPCODE_TXD,
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c 
b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
index a56d81c62e7..85e8afb3ed4 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c
+++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
@@ -212,42 +212,6 @@ static void transform_DP2(struct radeon_compiler* c,
        rc_remove_instruction(inst);
 }
 
-static void transform_TRUNC(struct radeon_compiler* c,
-       struct rc_instruction* inst)
-{
-       /* Definition of trunc:
-        *   trunc(x) = (abs(x) - fract(abs(x))) * sgn(x)
-        *
-        * The multiplication by sgn(x) can be simplified using CMP:
-        *   y * sgn(x) = (x < 0 ? -y : y)
-        */
-        
-       struct rc_src_register abs;
-       
-       if (c->is_r500 || c->type == RC_FRAGMENT_PROGRAM) {
-               abs = absolute(inst->U.I.SrcReg[0]);
-       } else {
-               /* abs isn't free on r300's and r400's vertex shader,
-                *  so we want to avoid doing it twice
-                */
-               int tmp = rc_find_free_temporary(c);
-
-               emit2(c, inst->Prev, RC_OPCODE_MAX, NULL, dstregtmpmask(tmp, 
RC_MASK_XYZW),
-                         srcregswz(inst->U.I.SrcReg[0].File, 
inst->U.I.SrcReg[0].Index, RC_SWIZZLE_XYZW),
-                     negate(srcregswz(inst->U.I.SrcReg[0].File, 
inst->U.I.SrcReg[0].Index, RC_SWIZZLE_XYZW)));
-               abs = srcregswz(RC_FILE_TEMPORARY, tmp, 
inst->U.I.SrcReg[0].Swizzle);
-
-       }
-       struct rc_dst_register dst = new_dst_reg(c, inst);
-       emit1(c, inst->Prev, RC_OPCODE_FRC, NULL, dst, abs);
-       emit2(c, inst->Prev, RC_OPCODE_ADD, NULL, dst, abs,
-             negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
-       emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, 
inst->U.I.SrcReg[0],
-             negate(srcreg(RC_FILE_TEMPORARY, dst.Index)), 
srcreg(RC_FILE_TEMPORARY, dst.Index));
-
-       rc_remove_instruction(inst);
-}
-
 static void transform_LRP(struct radeon_compiler* c,
        struct rc_instruction* inst)
 {
@@ -386,7 +350,6 @@ int radeonTransformALU(
        case RC_OPCODE_SLT: transform_SLT(c, inst); return 1;
        case RC_OPCODE_SNE: transform_SNE(c, inst); return 1;
        case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
-       case RC_OPCODE_TRUNC: transform_TRUNC(c, inst); return 1;
        default:
                return 0;
        }
@@ -545,17 +508,6 @@ static void transform_r300_vertex_SLE(struct 
radeon_compiler* c,
        inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
 }
 
-static void transform_vertex_TRUNC(struct radeon_compiler* c,
-       struct rc_instruction* inst)
-{
-       struct rc_instruction *next = inst->Next;
-
-       /* next->Prev is removed after each transformation and replaced
-        * by a new instruction. */
-       transform_TRUNC(c, next->Prev);
-       transform_r300_vertex_CMP(c, next->Prev);
-}
-
 /**
  * For use with rc_local_transform, this transforms non-native ALU
  * instructions of the r300 up to r500 vertex engine.
@@ -586,7 +538,6 @@ int r300_transform_vertex_alu(
                }
                return 0;
        case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
-       case RC_OPCODE_TRUNC: transform_vertex_TRUNC(c, inst); return 1;
        default:
                return 0;
        }
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c 
b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 82842c5fd67..afe809e05a2 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -81,7 +81,6 @@ static unsigned translate_opcode(unsigned opcode)
         case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE;
         case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF;
         case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP;
-        case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC;
         case TGSI_OPCODE_CONT: return RC_OPCODE_CONT;
         case TGSI_OPCODE_NOP: return RC_OPCODE_NOP;
         case TGSI_OPCODE_KILL_IF: return RC_OPCODE_KIL;

Reply via email to