Module: Mesa
Branch: main
Commit: eef7a117a1a1b045444a520a6dfc2a43069ae1fc
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=eef7a117a1a1b045444a520a6dfc2a43069ae1fc

Author: Jordan Justen <[email protected]>
Date:   Fri Mar 31 16:52:50 2023 -0700

intel/compiler: Support fmul_fsign opt for fp64 when int64 isn't supported

MTL support fp64, but not int64. The fsign(double(x))*FOO optimization
would try to use a 64-bit int xor operation to conditionally toggle
the sign bit off the result.

Since this only affects high bit of the result, we can do a 32-bit
move of the low dword, and a 32-bit xor on the high dword.

Fixes 
dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp64.input_args.modf_denorm_flush_to_zero
on MTL.

Signed-off-by: Jordan Justen <[email protected]>
Reviewed-by: Ian Romanick <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22259>

---

 src/intel/compiler/brw_fs_nir.cpp | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index d7415526d2e..9f5a8496cb7 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -864,15 +864,24 @@ fs_visitor::emit_fsign(const fs_builder &bld, const 
nir_alu_instr *instr,
          set_predicate(BRW_PREDICATE_NORMAL,
                        bld.OR(r, r, brw_imm_ud(0x3ff00000u)));
       } else {
-         /* This could be done better in some cases.  If the scale is an
-          * immediate with the low 32-bits all 0, emitting a separate XOR and
-          * OR would allow an algebraic optimization to remove the OR.  There
-          * are currently zero instances of fsign(double(x))*IMM in shader-db
-          * or any test suite, so it is hard to care at this time.
-          */
-         fs_reg result_int64 = retype(result, BRW_REGISTER_TYPE_UQ);
-         inst = bld.XOR(result_int64, result_int64,
-                        retype(op[1], BRW_REGISTER_TYPE_UQ));
+         if (devinfo->has_64bit_int) {
+            /* This could be done better in some cases.  If the scale is an
+             * immediate with the low 32-bits all 0, emitting a separate XOR 
and
+             * OR would allow an algebraic optimization to remove the OR.  
There
+             * are currently zero instances of fsign(double(x))*IMM in 
shader-db
+             * or any test suite, so it is hard to care at this time.
+             */
+            fs_reg result_int64 = retype(result, BRW_REGISTER_TYPE_UQ);
+            inst = bld.XOR(result_int64, result_int64,
+                           retype(op[1], BRW_REGISTER_TYPE_UQ));
+         } else {
+            fs_reg result_int64 = retype(result, BRW_REGISTER_TYPE_UQ);
+            bld.MOV(subscript(result_int64, BRW_REGISTER_TYPE_UD, 0),
+                    subscript(op[1], BRW_REGISTER_TYPE_UD, 0));
+            bld.XOR(subscript(result_int64, BRW_REGISTER_TYPE_UD, 1),
+                    subscript(result_int64, BRW_REGISTER_TYPE_UD, 1),
+                    subscript(op[1], BRW_REGISTER_TYPE_UD, 1));
+         }
       }
    }
 }

Reply via email to