Module: Mesa Branch: main Commit: 1c84c8183c5b0007aed5b389271cb1869a44acaf URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1c84c8183c5b0007aed5b389271cb1869a44acaf
Author: Faith Ekstrand <faith.ekstr...@collabora.com> Date: Wed Dec 6 17:41:22 2023 -0600 nak: Plumb through float controls Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26557> --- src/nouveau/compiler/meson.build | 1 + src/nouveau/compiler/nak/from_nir.rs | 147 +++++++++++++++++++++++++++++------ 2 files changed, 126 insertions(+), 22 deletions(-) diff --git a/src/nouveau/compiler/meson.build b/src/nouveau/compiler/meson.build index ade1e0942d2..3ce3372c4af 100644 --- a/src/nouveau/compiler/meson.build +++ b/src/nouveau/compiler/meson.build @@ -64,6 +64,7 @@ nak_bindings_rs = rust.bindgen( '--raw-line', '#![allow(non_upper_case_globals)]', '--allowlist-type', 'exec_list', '--allowlist-type', 'exec_node', + '--allowlist-type', 'float_controls', '--allowlist-type', 'gl_access_qualifier', '--allowlist-type', 'gl_frag_result', '--allowlist-type', 'gl_interp_mode', diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 2e9add9e04f..2d3d9cda7e1 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -14,6 +14,7 @@ use nak_bindings::*; use std::cmp::max; use std::collections::{HashMap, HashSet}; +use std::ops::Index; fn init_info_from_nir(nir: &nir_shader, sm: u8) -> ShaderInfo { ShaderInfo { @@ -144,9 +145,94 @@ impl<'a> PhiAllocMap<'a> { } } +struct PerSizeFloatControls { + pub ftz: bool, + pub rnd_mode: FRndMode, +} + +struct ShaderFloatControls { + pub fp16: PerSizeFloatControls, + pub fp32: PerSizeFloatControls, + pub fp64: PerSizeFloatControls, +} + +impl Default for ShaderFloatControls { + fn default() -> Self { + Self { + fp16: PerSizeFloatControls { + ftz: false, + rnd_mode: FRndMode::NearestEven, + }, + fp32: PerSizeFloatControls { + ftz: false, + rnd_mode: FRndMode::NearestEven, + }, + fp64: PerSizeFloatControls { + ftz: false, + rnd_mode: FRndMode::NearestEven, + }, + } + } +} + +impl ShaderFloatControls { + fn from_nir(nir: &nir_shader) -> ShaderFloatControls { + let nir_fc = nir.info.float_controls_execution_mode; + let mut fc: ShaderFloatControls = Default::default(); + + if (nir_fc & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) != 0 { + fc.fp16.ftz = false; + } else if (nir_fc & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) != 0 { + fc.fp16.ftz = true; + } + if (nir_fc & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) != 0 { + fc.fp16.rnd_mode = FRndMode::NearestEven; + } else if (nir_fc & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) != 0 { + fc.fp16.rnd_mode = FRndMode::Zero; + } + + if (nir_fc & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) != 0 { + fc.fp32.ftz = false; + } else if (nir_fc & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) != 0 { + fc.fp32.ftz = true; + } + if (nir_fc & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) != 0 { + fc.fp32.rnd_mode = FRndMode::NearestEven; + } else if (nir_fc & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) != 0 { + fc.fp32.rnd_mode = FRndMode::Zero; + } + + if (nir_fc & FLOAT_CONTROLS_DENORM_PRESERVE_FP64) != 0 { + fc.fp64.ftz = false; + } else if (nir_fc & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64) != 0 { + fc.fp64.ftz = true; + } + if (nir_fc & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64) != 0 { + fc.fp64.rnd_mode = FRndMode::NearestEven; + } else if (nir_fc & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64) != 0 { + fc.fp64.rnd_mode = FRndMode::Zero; + } + + fc + } +} + +impl Index<FloatType> for ShaderFloatControls { + type Output = PerSizeFloatControls; + + fn index(&self, idx: FloatType) -> &PerSizeFloatControls { + match idx { + FloatType::F16 => &self.fp16, + FloatType::F32 => &self.fp32, + FloatType::F64 => &self.fp64, + } + } +} + struct ShaderFromNir<'a> { nir: &'a nir_shader, info: ShaderInfo, + float_ctl: ShaderFloatControls, cfg: CFGBuilder<u32, BasicBlock>, label_alloc: LabelAllocator, block_label: HashMap<u32, Label>, @@ -162,6 +248,7 @@ impl<'a> ShaderFromNir<'a> { Self { nir: nir, info: init_info_from_nir(nir, sm), + float_ctl: ShaderFloatControls::from_nir(nir), cfg: CFGBuilder::new(), label_alloc: LabelAllocator::new(), block_label: HashMap::new(), @@ -489,18 +576,25 @@ impl<'a> ShaderFromNir<'a> { | nir_op_f2f32 | nir_op_f2f64 => { let src_bits = alu.get_src(0).src.bit_size(); let dst_bits = alu.def.bit_size(); + let src_type = FloatType::from_bits(src_bits.into()); + let dst_type = FloatType::from_bits(dst_bits.into()); let dst = b.alloc_ssa(RegFile::GPR, 1); b.push_op(OpF2F { dst: dst.into(), src: srcs[0], src_type: FloatType::from_bits(src_bits.into()), - dst_type: FloatType::from_bits(dst_bits.into()), + dst_type: dst_type, rnd_mode: match alu.op { + nir_op_f2f16_rtne => FRndMode::NearestEven, nir_op_f2f16_rtz => FRndMode::Zero, - _ => FRndMode::NearestEven, + _ => self.float_ctl[dst_type].rnd_mode, + }, + ftz: if src_bits < dst_bits { + self.float_ctl[src_type].ftz + } else { + self.float_ctl[dst_type].ftz }, - ftz: true, high: false, }); dst @@ -524,18 +618,19 @@ impl<'a> ShaderFromNir<'a> { | nir_op_f2u8 | nir_op_f2u16 | nir_op_f2u32 | nir_op_f2u64 => { let src_bits = usize::from(alu.get_src(0).bit_size()); let dst_bits = alu.def.bit_size(); + let src_type = FloatType::from_bits(src_bits); let dst = b.alloc_ssa(RegFile::GPR, dst_bits.div_ceil(32)); let dst_is_signed = alu.info().output_type & 2 != 0; b.push_op(OpF2I { dst: dst.into(), src: srcs[0], - src_type: FloatType::from_bits(src_bits), + src_type: src_type, dst_type: IntType::from_bits( dst_bits.into(), dst_is_signed, ), rnd_mode: FRndMode::Zero, - ftz: false, + ftz: self.float_ctl[src_type].ftz, }); dst } @@ -546,6 +641,7 @@ impl<'a> ShaderFromNir<'a> { nir_op_fneg => (Src::new_zero().fneg(), srcs[0].fneg()), _ => panic!("Unhandled case"), }; + let ftype = FloatType::from_bits(alu.def.bit_size().into()); assert!(alu.def.bit_size() == 32); let dst = b.alloc_ssa(RegFile::GPR, 1); let saturate = self.try_saturate_alu_dst(&alu.def); @@ -553,8 +649,8 @@ impl<'a> ShaderFromNir<'a> { dst: dst.into(), srcs: [x, y], saturate: saturate, - rnd_mode: FRndMode::NearestEven, - ftz: false, + rnd_mode: self.float_ctl[ftype].rnd_mode, + ftz: self.float_ctl[ftype].ftz, }); dst } @@ -586,14 +682,15 @@ impl<'a> ShaderFromNir<'a> { nir_op_feq => b.fsetp(FloatCmpOp::OrdEq, srcs[0], srcs[1]), nir_op_fexp2 => b.mufu(MuFuOp::Exp2, srcs[0]), nir_op_ffma => { + let ftype = FloatType::from_bits(alu.def.bit_size().into()); assert!(alu.def.bit_size() == 32); let dst = b.alloc_ssa(RegFile::GPR, 1); let ffma = OpFFma { dst: dst.into(), srcs: [srcs[0], srcs[1], srcs[2]], saturate: self.try_saturate_alu_dst(&alu.def), - rnd_mode: FRndMode::NearestEven, - ftz: false, + rnd_mode: self.float_ctl[ftype].rnd_mode, + ftz: self.float_ctl[ftype].ftz, }; b.push_op(ffma); dst @@ -617,19 +714,20 @@ impl<'a> ShaderFromNir<'a> { dst: dst.into(), srcs: [srcs[0], srcs[1]], min: (alu.op == nir_op_fmin).into(), - ftz: false, + ftz: self.float_ctl.fp32.ftz, }); dst } nir_op_fmul => { + let ftype = FloatType::from_bits(alu.def.bit_size().into()); assert!(alu.def.bit_size() == 32); let dst = b.alloc_ssa(RegFile::GPR, 1); let fmul = OpFMul { dst: dst.into(), srcs: [srcs[0], srcs[1]], saturate: self.try_saturate_alu_dst(&alu.def), - rnd_mode: FRndMode::NearestEven, - ftz: false, + rnd_mode: self.float_ctl[ftype].rnd_mode, + ftz: self.float_ctl[ftype].ftz, }; b.push_op(fmul); dst @@ -672,13 +770,14 @@ impl<'a> ShaderFromNir<'a> { if self.alu_src_is_saturated(&alu.srcs_as_slice()[0]) { b.copy(srcs[0]) } else { + let ftype = FloatType::from_bits(alu.def.bit_size().into()); let dst = b.alloc_ssa(RegFile::GPR, 1); b.push_op(OpFAdd { dst: dst.into(), srcs: [srcs[0], 0.into()], saturate: true, - rnd_mode: FRndMode::NearestEven, - ftz: false, + rnd_mode: self.float_ctl[ftype].rnd_mode, + ftz: self.float_ctl[ftype].ftz, }); dst } @@ -698,13 +797,14 @@ impl<'a> ShaderFromNir<'a> { nir_op_i2f16 | nir_op_i2f32 | nir_op_i2f64 => { let src_bits = alu.get_src(0).src.bit_size(); let dst_bits = alu.def.bit_size(); + let dst_type = FloatType::from_bits(dst_bits.into()); let dst = b.alloc_ssa(RegFile::GPR, dst_bits.div_ceil(32)); b.push_op(OpI2F { dst: dst.into(), src: srcs[0], - dst_type: FloatType::from_bits(dst_bits.into()), + dst_type: dst_type, src_type: IntType::from_bits(src_bits.into(), true), - rnd_mode: FRndMode::NearestEven, + rnd_mode: self.float_ctl[dst_type].rnd_mode, }); dst } @@ -996,13 +1096,14 @@ impl<'a> ShaderFromNir<'a> { nir_op_u2f16 | nir_op_u2f32 | nir_op_u2f64 => { let src_bits = alu.get_src(0).src.bit_size(); let dst_bits = alu.def.bit_size(); + let dst_type = FloatType::from_bits(dst_bits.into()); let dst = b.alloc_ssa(RegFile::GPR, dst_bits.div_ceil(32)); b.push_op(OpI2F { dst: dst.into(), src: srcs[0], - dst_type: FloatType::from_bits(dst_bits.into()), + dst_type: dst_type, src_type: IntType::from_bits(src_bits.into(), false), - rnd_mode: FRndMode::NearestEven, + rnd_mode: self.float_ctl[dst_type].rnd_mode, }); dst } @@ -1137,6 +1238,7 @@ impl<'a> ShaderFromNir<'a> { // TODO: Real coarse derivatives assert!(alu.def.bit_size() == 32); + let ftype = FloatType::F32; let scratch = b.alloc_ssa(RegFile::GPR, 1); b.push_op(OpShfl { @@ -1159,8 +1261,8 @@ impl<'a> ShaderFromNir<'a> { FSwzAddOp::SubLeft, FSwzAddOp::SubRight, ], - rnd_mode: FRndMode::NearestEven, - ftz: false, + rnd_mode: self.float_ctl[ftype].rnd_mode, + ftz: self.float_ctl[ftype].ftz, }); dst @@ -1169,6 +1271,7 @@ impl<'a> ShaderFromNir<'a> { // TODO: Real coarse derivatives assert!(alu.def.bit_size() == 32); + let ftype = FloatType::F32; let scratch = b.alloc_ssa(RegFile::GPR, 1); b.push_op(OpShfl { @@ -1191,8 +1294,8 @@ impl<'a> ShaderFromNir<'a> { FSwzAddOp::SubRight, FSwzAddOp::SubRight, ], - rnd_mode: FRndMode::NearestEven, - ftz: false, + rnd_mode: self.float_ctl[ftype].rnd_mode, + ftz: self.float_ctl[ftype].ftz, }); dst