Module: Mesa Branch: main Commit: e1fecd83edcd6d6682acb14f8256b3436272856a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1fecd83edcd6d6682acb14f8256b3436272856a
Author: Faith Ekstrand <faith.ekstr...@collabora.com> Date: Tue Dec 19 14:23:16 2023 -0600 nak/sm50: Add DMnMx and use it for fp64 fmin/fmax Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26587> --- src/nouveau/compiler/nak/encode_sm50.rs | 23 +++++++++++++++++++++++ src/nouveau/compiler/nak/from_nir.rs | 27 +++++++++++++++++++-------- src/nouveau/compiler/nak/ir.rs | 26 +++++++++++++++++++++++++- src/nouveau/compiler/nak/legalize.rs | 6 ++++++ 4 files changed, 73 insertions(+), 9 deletions(-) diff --git a/src/nouveau/compiler/nak/encode_sm50.rs b/src/nouveau/compiler/nak/encode_sm50.rs index 6ef74b2fe9d..5a714fb81ec 100644 --- a/src/nouveau/compiler/nak/encode_sm50.rs +++ b/src/nouveau/compiler/nak/encode_sm50.rs @@ -1691,6 +1691,28 @@ impl SM50Instr { self.set_rnd_mode(50..52, op.rnd_mode); } + fn encode_dmnmx(&mut self, op: &OpDMnMx) { + match &op.srcs[1].src_ref { + SrcRef::Zero | SrcRef::Reg(_) => { + self.set_opcode(0x5c50); + self.set_reg_fmod_src(20..28, 49, 45, op.srcs[1]); + } + SrcRef::Imm32(imm32) => { + self.set_opcode(0x3850); + self.set_src_imm_f20(20..39, 56, *imm32); + } + SrcRef::CBuf(_) => { + self.set_opcode(0x4c50); + self.set_cb_fmod_src(20..39, 49, 45, op.srcs[1]); + } + src => panic!("Unsupported src type for FMNMX: {src}"), + } + + self.set_reg_fmod_src(8..16, 46, 48, op.srcs[0]); + self.set_dst(op.dst); + self.set_pred_src(39..42, 42, op.min); + } + fn encode_dmul(&mut self, op: &OpDMul) { match &op.srcs[1].src_ref { SrcRef::Zero | SrcRef::Reg(_) => { @@ -1872,6 +1894,7 @@ impl SM50Instr { Op::MuFu(op) => si.encode_mufu(&op), Op::DAdd(op) => si.encode_dadd(&op), Op::DFma(op) => si.encode_dfma(&op), + Op::DMnMx(op) => si.encode_dmnmx(&op), Op::DMul(op) => si.encode_dmul(&op), Op::DSetP(op) => si.encode_dsetp(&op), Op::IAbs(op) => si.encode_iabs(&op), diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index f813e52ca2c..0b031c2eddb 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -782,14 +782,25 @@ impl<'a> ShaderFromNir<'a> { b.mufu(MuFuOp::Log2, srcs[0]) } nir_op_fmax | nir_op_fmin => { - assert!(alu.def.bit_size() == 32); - let dst = b.alloc_ssa(RegFile::GPR, 1); - b.push_op(OpFMnMx { - dst: dst.into(), - srcs: [srcs[0], srcs[1]], - min: (alu.op == nir_op_fmin).into(), - ftz: self.float_ctl.fp32.ftz, - }); + let dst; + if alu.def.bit_size() == 64 { + dst = b.alloc_ssa(RegFile::GPR, 2); + b.push_op(OpDMnMx { + dst: dst.into(), + srcs: [srcs[0], srcs[1]], + min: (alu.op == nir_op_fmin).into(), + }); + } else if alu.def.bit_size() == 32 { + dst = b.alloc_ssa(RegFile::GPR, 1); + b.push_op(OpFMnMx { + dst: dst.into(), + srcs: [srcs[0], srcs[1]], + min: (alu.op == nir_op_fmin).into(), + ftz: self.float_ctl.fp32.ftz, + }); + } else { + panic!("Unsupported float type: f{}", alu.def.bit_size()); + } dst } nir_op_fmul => { diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 67b8abe22d0..c3455cbd9b4 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -2529,6 +2529,25 @@ impl DisplayOp for OpDFma { } impl_display_for_op!(OpDFma); +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpDMnMx { + pub dst: Dst, + + #[src_type(F64)] + pub srcs: [Src; 2], + + #[src_type(Pred)] + pub min: Src, +} + +impl DisplayOp for OpDMnMx { + fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "dmnmx {} {} {}", self.srcs[0], self.srcs[1], self.min) + } +} +impl_display_for_op!(OpDMnMx); + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpDSetP { @@ -4698,6 +4717,7 @@ pub enum Op { FSwzAdd(OpFSwzAdd), DAdd(OpDAdd), DFma(OpDFma), + DMnMx(OpDMnMx), DMul(OpDMul), DSetP(OpDSetP), Brev(OpBrev), @@ -5137,7 +5157,11 @@ impl Instr { Op::MuFu(_) => false, // Double-precision float ALU - Op::DAdd(_) | Op::DFma(_) | Op::DMul(_) | Op::DSetP(_) => false, + Op::DAdd(_) + | Op::DFma(_) + | Op::DMnMx(_) + | Op::DMul(_) + | Op::DSetP(_) => false, // Integer ALU Op::Brev(_) | Op::Flo(_) | Op::PopC(_) => false, diff --git a/src/nouveau/compiler/nak/legalize.rs b/src/nouveau/compiler/nak/legalize.rs index 4878d7e806b..46721d2802c 100644 --- a/src/nouveau/compiler/nak/legalize.rs +++ b/src/nouveau/compiler/nak/legalize.rs @@ -241,6 +241,12 @@ fn legalize_sm50_instr( copy_alu_src_if_f20_overflow(b, src1, SrcType::F64); copy_alu_src_if_not_reg(b, src2, SrcType::F64); } + Op::DMnMx(op) => { + let [ref mut src0, ref mut src1] = op.srcs; + swap_srcs_if_not_reg(src0, src1); + copy_alu_src_if_not_reg(b, src0, SrcType::F64); + copy_alu_src_if_f20_overflow(b, src1, SrcType::F64); + } Op::DMul(op) => { let [ref mut src0, ref mut src1] = op.srcs; copy_alu_src_if_fabs(b, src0, SrcType::F64);