Module: Mesa
Branch: main
Commit: e1fecd83edcd6d6682acb14f8256b3436272856a
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1fecd83edcd6d6682acb14f8256b3436272856a

Author: Faith Ekstrand <faith.ekstr...@collabora.com>
Date:   Tue Dec 19 14:23:16 2023 -0600

nak/sm50: Add DMnMx and use it for fp64 fmin/fmax

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26587>

---

 src/nouveau/compiler/nak/encode_sm50.rs | 23 +++++++++++++++++++++++
 src/nouveau/compiler/nak/from_nir.rs    | 27 +++++++++++++++++++--------
 src/nouveau/compiler/nak/ir.rs          | 26 +++++++++++++++++++++++++-
 src/nouveau/compiler/nak/legalize.rs    |  6 ++++++
 4 files changed, 73 insertions(+), 9 deletions(-)

diff --git a/src/nouveau/compiler/nak/encode_sm50.rs 
b/src/nouveau/compiler/nak/encode_sm50.rs
index 6ef74b2fe9d..5a714fb81ec 100644
--- a/src/nouveau/compiler/nak/encode_sm50.rs
+++ b/src/nouveau/compiler/nak/encode_sm50.rs
@@ -1691,6 +1691,28 @@ impl SM50Instr {
         self.set_rnd_mode(50..52, op.rnd_mode);
     }
 
+    fn encode_dmnmx(&mut self, op: &OpDMnMx) {
+        match &op.srcs[1].src_ref {
+            SrcRef::Zero | SrcRef::Reg(_) => {
+                self.set_opcode(0x5c50);
+                self.set_reg_fmod_src(20..28, 49, 45, op.srcs[1]);
+            }
+            SrcRef::Imm32(imm32) => {
+                self.set_opcode(0x3850);
+                self.set_src_imm_f20(20..39, 56, *imm32);
+            }
+            SrcRef::CBuf(_) => {
+                self.set_opcode(0x4c50);
+                self.set_cb_fmod_src(20..39, 49, 45, op.srcs[1]);
+            }
+            src => panic!("Unsupported src type for FMNMX: {src}"),
+        }
+
+        self.set_reg_fmod_src(8..16, 46, 48, op.srcs[0]);
+        self.set_dst(op.dst);
+        self.set_pred_src(39..42, 42, op.min);
+    }
+
     fn encode_dmul(&mut self, op: &OpDMul) {
         match &op.srcs[1].src_ref {
             SrcRef::Zero | SrcRef::Reg(_) => {
@@ -1872,6 +1894,7 @@ impl SM50Instr {
             Op::MuFu(op) => si.encode_mufu(&op),
             Op::DAdd(op) => si.encode_dadd(&op),
             Op::DFma(op) => si.encode_dfma(&op),
+            Op::DMnMx(op) => si.encode_dmnmx(&op),
             Op::DMul(op) => si.encode_dmul(&op),
             Op::DSetP(op) => si.encode_dsetp(&op),
             Op::IAbs(op) => si.encode_iabs(&op),
diff --git a/src/nouveau/compiler/nak/from_nir.rs 
b/src/nouveau/compiler/nak/from_nir.rs
index f813e52ca2c..0b031c2eddb 100644
--- a/src/nouveau/compiler/nak/from_nir.rs
+++ b/src/nouveau/compiler/nak/from_nir.rs
@@ -782,14 +782,25 @@ impl<'a> ShaderFromNir<'a> {
                 b.mufu(MuFuOp::Log2, srcs[0])
             }
             nir_op_fmax | nir_op_fmin => {
-                assert!(alu.def.bit_size() == 32);
-                let dst = b.alloc_ssa(RegFile::GPR, 1);
-                b.push_op(OpFMnMx {
-                    dst: dst.into(),
-                    srcs: [srcs[0], srcs[1]],
-                    min: (alu.op == nir_op_fmin).into(),
-                    ftz: self.float_ctl.fp32.ftz,
-                });
+                let dst;
+                if alu.def.bit_size() == 64 {
+                    dst = b.alloc_ssa(RegFile::GPR, 2);
+                    b.push_op(OpDMnMx {
+                        dst: dst.into(),
+                        srcs: [srcs[0], srcs[1]],
+                        min: (alu.op == nir_op_fmin).into(),
+                    });
+                } else if alu.def.bit_size() == 32 {
+                    dst = b.alloc_ssa(RegFile::GPR, 1);
+                    b.push_op(OpFMnMx {
+                        dst: dst.into(),
+                        srcs: [srcs[0], srcs[1]],
+                        min: (alu.op == nir_op_fmin).into(),
+                        ftz: self.float_ctl.fp32.ftz,
+                    });
+                } else {
+                    panic!("Unsupported float type: f{}", alu.def.bit_size());
+                }
                 dst
             }
             nir_op_fmul => {
diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs
index 67b8abe22d0..c3455cbd9b4 100644
--- a/src/nouveau/compiler/nak/ir.rs
+++ b/src/nouveau/compiler/nak/ir.rs
@@ -2529,6 +2529,25 @@ impl DisplayOp for OpDFma {
 }
 impl_display_for_op!(OpDFma);
 
+#[repr(C)]
+#[derive(SrcsAsSlice, DstsAsSlice)]
+pub struct OpDMnMx {
+    pub dst: Dst,
+
+    #[src_type(F64)]
+    pub srcs: [Src; 2],
+
+    #[src_type(Pred)]
+    pub min: Src,
+}
+
+impl DisplayOp for OpDMnMx {
+    fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "dmnmx {} {} {}", self.srcs[0], self.srcs[1], self.min)
+    }
+}
+impl_display_for_op!(OpDMnMx);
+
 #[repr(C)]
 #[derive(SrcsAsSlice, DstsAsSlice)]
 pub struct OpDSetP {
@@ -4698,6 +4717,7 @@ pub enum Op {
     FSwzAdd(OpFSwzAdd),
     DAdd(OpDAdd),
     DFma(OpDFma),
+    DMnMx(OpDMnMx),
     DMul(OpDMul),
     DSetP(OpDSetP),
     Brev(OpBrev),
@@ -5137,7 +5157,11 @@ impl Instr {
             Op::MuFu(_) => false,
 
             // Double-precision float ALU
-            Op::DAdd(_) | Op::DFma(_) | Op::DMul(_) | Op::DSetP(_) => false,
+            Op::DAdd(_)
+            | Op::DFma(_)
+            | Op::DMnMx(_)
+            | Op::DMul(_)
+            | Op::DSetP(_) => false,
 
             // Integer ALU
             Op::Brev(_) | Op::Flo(_) | Op::PopC(_) => false,
diff --git a/src/nouveau/compiler/nak/legalize.rs 
b/src/nouveau/compiler/nak/legalize.rs
index 4878d7e806b..46721d2802c 100644
--- a/src/nouveau/compiler/nak/legalize.rs
+++ b/src/nouveau/compiler/nak/legalize.rs
@@ -241,6 +241,12 @@ fn legalize_sm50_instr(
             copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
             copy_alu_src_if_not_reg(b, src2, SrcType::F64);
         }
+        Op::DMnMx(op) => {
+            let [ref mut src0, ref mut src1] = op.srcs;
+            swap_srcs_if_not_reg(src0, src1);
+            copy_alu_src_if_not_reg(b, src0, SrcType::F64);
+            copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
+        }
         Op::DMul(op) => {
             let [ref mut src0, ref mut src1] = op.srcs;
             copy_alu_src_if_fabs(b, src0, SrcType::F64);

Reply via email to