Module: Mesa
Branch: main
Commit: 7f05ea3793b68626958a10853561feae3e232392
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7f05ea3793b68626958a10853561feae3e232392

Author: Rhys Perry <[email protected]>
Date:   Wed Apr 28 17:48:54 2021 +0100

nir: add nir_op_fmulz and nir_op_ffmaz

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Timur Kristóf <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13436>

---

 src/compiler/nir/nir.h                |  3 +++
 src/compiler/nir/nir_opcodes.py       | 27 +++++++++++++++++++++++++++
 src/compiler/nir/nir_opt_algebraic.py |  3 +++
 3 files changed, 33 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index aae86579bc7..b4ee15eefb7 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3446,6 +3446,9 @@ typedef struct nir_shader_compiler_options {
     */
    bool use_scoped_barrier;
 
+   /** Backend supports fmulz (and ffmaz if lower_ffma32=false) */
+   bool has_fmulz;
+
    /**
     * Is this the Intel vec4 backend?
     *
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index a104edc9882..c035c70ad9c 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -669,6 +669,20 @@ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
    dst = src0 * src1;
 }
 """)
+
+# Unlike fmul, anything (even infinity or NaN) multiplied by zero is always 
zero.
+# fmulz(0.0, inf) and fmulz(0.0, nan) must be +/-0.0, even if
+# SIGNED_ZERO_INF_NAN_PRESERVE is not used. If SIGNED_ZERO_INF_NAN_PRESERVE is 
used, then
+# the result must be a positive zero if either operand is zero.
+binop("fmulz", tfloat32, _2src_commutative + associative, """
+if (src0 == 0.0 || src1 == 0.0)
+   dst = 0.0;
+else if (nir_is_rounding_mode_rtz(execution_mode, 32))
+   dst = _mesa_double_to_float_rtz((double)src0 * (double)src1);
+else
+   dst = src0 * src1;
+""")
+
 # low 32-bits of signed/unsigned integer multiply
 binop("imul", tint, _2src_commutative + associative, """
    /* Use 64-bit multiplies to prevent overflow of signed arithmetic */
@@ -960,6 +974,19 @@ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
 }
 """)
 
+# Unlike ffma, anything (even infinity or NaN) multiplied by zero is always 
zero.
+# ffmaz(0.0, inf, src2) and ffmaz(0.0, nan, src2) must be +/-0.0 + src2, even 
if
+# SIGNED_ZERO_INF_NAN_PRESERVE is not used. If SIGNED_ZERO_INF_NAN_PRESERVE is 
used, then
+# the result must be a positive zero plus src2 if either src0 or src1 is zero.
+triop("ffmaz", tfloat32, _2src_commutative, """
+if (src0 == 0.0 || src1 == 0.0)
+   dst = 0.0 + src2;
+else if (nir_is_rounding_mode_rtz(execution_mode, 32))
+   dst = _mesa_float_fma_rtz(src0, src1, src2);
+else
+   dst = fmaf(src0, src1, src2);
+""")
+
 triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2")
 
 # Ternary addition
diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index d5a9f7265bc..949e0f24278 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -330,10 +330,12 @@ optimizations.extend([
    (('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 
'options->lower_ffma16'),
    (('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 
'options->lower_ffma32'),
    (('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 
'options->lower_ffma64'),
+   (('ffmaz', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->lower_ffma32'),
    # Always lower inexact ffma, because it will be fused back by late 
optimizations (nir_opt_algebraic_late).
    (('~ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 
'options->fuse_ffma16'),
    (('~ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 
'options->fuse_ffma32'),
    (('~ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 
'options->fuse_ffma64'),
+   (('~ffmaz', a, b, c), ('fadd', ('fmulz', a, b), c), 'options->fuse_ffma32'),
 
    (('~fmul', ('fadd', ('iand', ('ineg', ('b2i', 'a@bool')), ('fmul', b, c)), 
'#d'), '#e'),
     ('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))),
@@ -2483,6 +2485,7 @@ late_optimizations = [
    (('~fadd@16', ('fmul', a, b), c), ('ffma', a, b, c), 
'options->fuse_ffma16'),
    (('~fadd@32', ('fmul', a, b), c), ('ffma', a, b, c), 
'options->fuse_ffma32'),
    (('~fadd@64', ('fmul', a, b), c), ('ffma', a, b, c), 
'options->fuse_ffma64'),
+   (('~fadd@32', ('fmulz', a, b), c), ('ffmaz', a, b, c), 
'options->fuse_ffma32'),
 
    # Subtractions get lowered during optimization, so we need to recombine them
    (('fadd', a, ('fneg', 'b')), ('fsub', 'a', 'b'), 'options->has_fsub'),

Reply via email to