Module: Mesa
Branch: main
Commit: 99bbfbf7b2ecf6c78d9575221b00b6180086014c
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=99bbfbf7b2ecf6c78d9575221b00b6180086014c

Author: Faith Ekstrand <faith.ekstr...@collabora.com>
Date:   Tue Dec  5 20:19:39 2023 -0600

nak: Implement uadd_sat

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26535>

---

 src/nouveau/compiler/nak.rs          |  4 ++--
 src/nouveau/compiler/nak_from_nir.rs | 29 +++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/src/nouveau/compiler/nak.rs b/src/nouveau/compiler/nak.rs
index 3f6ed338190..929a4e40d5a 100644
--- a/src/nouveau/compiler/nak.rs
+++ b/src/nouveau/compiler/nak.rs
@@ -102,7 +102,7 @@ pub extern "C" fn nak_should_print_nir() -> bool {
     DEBUG.print()
 }
 
-fn nir_options(_dev: &nv_device_info) -> nir_shader_compiler_options {
+fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
     let mut op: nir_shader_compiler_options = unsafe { std::mem::zeroed() };
 
     op.lower_fdiv = true;
@@ -126,7 +126,7 @@ fn nir_options(_dev: &nv_device_info) -> 
nir_shader_compiler_options {
     op.lower_cs_local_index_to_id = true;
     op.lower_device_index_to_zero = true;
     op.lower_isign = true;
-    op.lower_uadd_sat = true; // TODO
+    op.lower_uadd_sat = dev.sm < 70;
     op.lower_usub_sat = true; // TODO
     op.lower_iadd_sat = true; // TODO
     op.use_interpolated_input_intrinsics = true;
diff --git a/src/nouveau/compiler/nak_from_nir.rs 
b/src/nouveau/compiler/nak_from_nir.rs
index a7d3f76a892..01636c5211d 100644
--- a/src/nouveau/compiler/nak_from_nir.rs
+++ b/src/nouveau/compiler/nak_from_nir.rs
@@ -1037,6 +1037,35 @@ impl<'a> ShaderFromNir<'a> {
                 });
                 dst
             }
+            nir_op_uadd_sat => {
+                let x = srcs[0].as_ssa().unwrap();
+                let y = srcs[1].as_ssa().unwrap();
+                let sum_lo = b.alloc_ssa(RegFile::GPR, 1);
+                let ovf_lo = b.alloc_ssa(RegFile::Pred, 1);
+                b.push_op(OpIAdd3 {
+                    dst: sum_lo.into(),
+                    overflow: [ovf_lo.into(), Dst::None],
+                    srcs: [0.into(), x[0].into(), y[0].into()],
+                });
+                if alu.def.bit_size() == 64 {
+                    let sum_hi = b.alloc_ssa(RegFile::GPR, 1);
+                    let ovf_hi = b.alloc_ssa(RegFile::Pred, 1);
+                    b.push_op(OpIAdd3X {
+                        dst: sum_hi.into(),
+                        overflow: [ovf_hi.into(), Dst::None],
+                        srcs: [0.into(), x[1].into(), y[1].into()],
+                        carry: [ovf_lo.into(), false.into()],
+                    });
+                    let lo =
+                        b.sel(ovf_hi.into(), u32::MAX.into(), sum_lo.into());
+                    let hi =
+                        b.sel(ovf_hi.into(), u32::MAX.into(), sum_hi.into());
+                    [lo[0], hi[0]].into()
+                } else {
+                    assert!(alu.def.bit_size() == 32);
+                    b.sel(ovf_lo.into(), u32::MAX.into(), sum_lo.into())
+                }
+            }
             nir_op_unpack_32_2x16_split_x => {
                 b.prmt(srcs[0], 0.into(), [0, 1, 4, 4])
             }

Reply via email to