Module: Mesa Branch: main Commit: 99bbfbf7b2ecf6c78d9575221b00b6180086014c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=99bbfbf7b2ecf6c78d9575221b00b6180086014c
Author: Faith Ekstrand <faith.ekstr...@collabora.com> Date: Tue Dec 5 20:19:39 2023 -0600 nak: Implement uadd_sat Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26535> --- src/nouveau/compiler/nak.rs | 4 ++-- src/nouveau/compiler/nak_from_nir.rs | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/nouveau/compiler/nak.rs b/src/nouveau/compiler/nak.rs index 3f6ed338190..929a4e40d5a 100644 --- a/src/nouveau/compiler/nak.rs +++ b/src/nouveau/compiler/nak.rs @@ -102,7 +102,7 @@ pub extern "C" fn nak_should_print_nir() -> bool { DEBUG.print() } -fn nir_options(_dev: &nv_device_info) -> nir_shader_compiler_options { +fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options { let mut op: nir_shader_compiler_options = unsafe { std::mem::zeroed() }; op.lower_fdiv = true; @@ -126,7 +126,7 @@ fn nir_options(_dev: &nv_device_info) -> nir_shader_compiler_options { op.lower_cs_local_index_to_id = true; op.lower_device_index_to_zero = true; op.lower_isign = true; - op.lower_uadd_sat = true; // TODO + op.lower_uadd_sat = dev.sm < 70; op.lower_usub_sat = true; // TODO op.lower_iadd_sat = true; // TODO op.use_interpolated_input_intrinsics = true; diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index a7d3f76a892..01636c5211d 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -1037,6 +1037,35 @@ impl<'a> ShaderFromNir<'a> { }); dst } + nir_op_uadd_sat => { + let x = srcs[0].as_ssa().unwrap(); + let y = srcs[1].as_ssa().unwrap(); + let sum_lo = b.alloc_ssa(RegFile::GPR, 1); + let ovf_lo = b.alloc_ssa(RegFile::Pred, 1); + b.push_op(OpIAdd3 { + dst: sum_lo.into(), + overflow: [ovf_lo.into(), Dst::None], + srcs: [0.into(), x[0].into(), y[0].into()], + }); + if alu.def.bit_size() == 64 { + let sum_hi = b.alloc_ssa(RegFile::GPR, 1); + let ovf_hi = b.alloc_ssa(RegFile::Pred, 1); + b.push_op(OpIAdd3X { + dst: sum_hi.into(), + overflow: [ovf_hi.into(), Dst::None], + srcs: [0.into(), x[1].into(), y[1].into()], + carry: [ovf_lo.into(), false.into()], + }); + let lo = + b.sel(ovf_hi.into(), u32::MAX.into(), sum_lo.into()); + let hi = + b.sel(ovf_hi.into(), u32::MAX.into(), sum_hi.into()); + [lo[0], hi[0]].into() + } else { + assert!(alu.def.bit_size() == 32); + b.sel(ovf_lo.into(), u32::MAX.into(), sum_lo.into()) + } + } nir_op_unpack_32_2x16_split_x => { b.prmt(srcs[0], 0.into(), [0, 1, 4, 4]) }