Module: Mesa
Branch: main
Commit: 09fc5e1c4dc4c2128b692f4f0688b8a69ba456fd
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=09fc5e1c4dc4c2128b692f4f0688b8a69ba456fd

Author: Faith Ekstrand <faith.ekstr...@collabora.com>
Date:   Tue Dec  5 19:58:20 2023 -0600

nir: Split has_[su]dot_4x8 bits into regular and _sat versions

Reviewed-by: Karol Herbst <kher...@redhat.com>
Reviewed-by: Jesse Natalie <jenat...@microsoft.com>
Reviewed-by: Ian Romanick <ian.d.roman...@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26533>

---

 src/amd/vulkan/radv_shader.c                 |  3 +++
 src/compiler/nir/nir.h                       | 15 ++++++++++++---
 src/compiler/nir/nir_opt_algebraic.py        |  6 +++---
 src/freedreno/ir3/ir3_compiler.c             |  2 ++
 src/gallium/drivers/radeonsi/si_get.c        |  3 +++
 src/gallium/frontends/rusticl/api/device.rs  | 15 +++++++++------
 src/gallium/frontends/rusticl/core/device.rs | 12 ++++++++++++
 src/intel/compiler/brw_compiler.c            |  3 +++
 src/microsoft/compiler/nir_to_dxil.c         |  2 ++
 9 files changed, 49 insertions(+), 12 deletions(-)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 05e19dc0aea..2265cca6733 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -105,6 +105,9 @@ get_nir_options_for_stage(struct radv_physical_device 
*device, gl_shader_stage s
       .has_sdot_4x8 = device->rad_info.has_accelerated_dot_product,
       .has_sudot_4x8 = device->rad_info.has_accelerated_dot_product && 
device->rad_info.gfx_level >= GFX11,
       .has_udot_4x8 = device->rad_info.has_accelerated_dot_product,
+      .has_sdot_4x8_sat = device->rad_info.has_accelerated_dot_product,
+      .has_sudot_4x8_sat = device->rad_info.has_accelerated_dot_product && 
device->rad_info.gfx_level >= GFX11,
+      .has_udot_4x8_sat = device->rad_info.has_accelerated_dot_product,
       .has_dot_2x16 = device->rad_info.has_accelerated_dot_product && 
device->rad_info.gfx_level < GFX11,
       .has_find_msb_rev = true,
       .has_pack_half_2x16_rtz = true,
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index b5eb056184e..1a5a820c7c4 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3876,15 +3876,24 @@ typedef struct nir_shader_compiler_options {
     * lowerings. */
    bool has_texture_scaling;
 
-   /** Backend supports sdot_4x8 opcodes. */
+   /** Backend supports sdot_4x8_iadd. */
    bool has_sdot_4x8;
 
-   /** Backend supports udot_4x8 opcodes. */
+   /** Backend supports udot_4x8_uadd. */
    bool has_udot_4x8;
 
-   /** Backend supports sudot_4x8 opcodes. */
+   /** Backend supports sudot_4x8_iadd. */
    bool has_sudot_4x8;
 
+   /** Backend supports sdot_4x8_iadd_sat. */
+   bool has_sdot_4x8_sat;
+
+   /** Backend supports udot_4x8_uadd_sat. */
+   bool has_udot_4x8_sat;
+
+   /** Backend supports sudot_4x8_iadd_sat. */
+   bool has_sudot_4x8_sat;
+
    /** Backend supports sdot_2x16 and udot_2x16 opcodes. */
    bool has_dot_2x16;
 
diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 8e531b1e5b2..d16bdb84e5f 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -321,15 +321,15 @@ optimizations.extend([
    # overflowing.  0x100000000 - 0x3f804 = 0xfffc07fc.  If c is a constant
    # that is less than 0xfffc07fc, then the result cannot overflow ever.
    (('udot_4x8_uadd_sat', a, b, '#c(is_ult_0xfffc07fc)'), ('udot_4x8_uadd', a, 
b, c)),
-   (('udot_4x8_uadd_sat', a, b, c), ('uadd_sat', udot_4x8_a_b, c), 
'!options->has_udot_4x8'),
+   (('udot_4x8_uadd_sat', a, b, c), ('uadd_sat', udot_4x8_a_b, c), 
'!options->has_udot_4x8_sat'),
 
    # For the signed dot-product, the largest positive value is 4*(-128*-128) =
    # 0x10000, and the largest negative value is 4*(-128*127) = -0xfe00.  We
    # don't have to worry about that intermediate result overflowing or
    # underflowing.
-   (('sdot_4x8_iadd_sat', a, b, c), ('iadd_sat', sdot_4x8_a_b, c), 
'!options->has_sdot_4x8'),
+   (('sdot_4x8_iadd_sat', a, b, c), ('iadd_sat', sdot_4x8_a_b, c), 
'!options->has_sdot_4x8_sat'),
 
-   (('sudot_4x8_iadd_sat', a, b, c), ('iadd_sat', sudot_4x8_a_b, c), 
'!options->has_sudot_4x8'),
+   (('sudot_4x8_iadd_sat', a, b, c), ('iadd_sat', sudot_4x8_a_b, c), 
'!options->has_sudot_4x8_sat'),
 
    (('udot_2x16_uadd_sat', a, b, c), ('uadd_sat', udot_2x16_a_b, c), 
'!options->has_dot_2x16'),
    (('sdot_2x16_iadd_sat', a, b, c), ('iadd_sat', sdot_2x16_a_b, c), 
'!options->has_dot_2x16'),
diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c
index 53ea182da21..80ca0656df7 100644
--- a/src/freedreno/ir3/ir3_compiler.c
+++ b/src/freedreno/ir3/ir3_compiler.c
@@ -290,6 +290,8 @@ ir3_compiler_create(struct fd_device *dev, const struct 
fd_dev_id *dev_id,
       compiler->nir_options.has_sudot_4x8 = true,
       compiler->nir_options.has_udot_4x8 = dev_info->a6xx.has_dp2acc;
       compiler->nir_options.has_sudot_4x8 = dev_info->a6xx.has_dp2acc;
+      compiler->nir_options.has_udot_4x8_sat = dev_info->a6xx.has_dp2acc;
+      compiler->nir_options.has_sudot_4x8_sat = dev_info->a6xx.has_dp2acc;
    } else if (compiler->gen >= 3 && compiler->gen <= 5) {
       compiler->nir_options.vertex_id_zero_based = true;
    } else if (compiler->gen <= 2) {
diff --git a/src/gallium/drivers/radeonsi/si_get.c 
b/src/gallium/drivers/radeonsi/si_get.c
index b85fc7c6e88..59b17adac99 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -1424,6 +1424,9 @@ void si_init_screen_get_functions(struct si_screen 
*sscreen)
       .has_sdot_4x8 = sscreen->info.has_accelerated_dot_product,
       .has_sudot_4x8 = sscreen->info.has_accelerated_dot_product && 
sscreen->info.gfx_level >= GFX11,
       .has_udot_4x8 = sscreen->info.has_accelerated_dot_product,
+      .has_sdot_4x8_sat = sscreen->info.has_accelerated_dot_product,
+      .has_sudot_4x8_sat = sscreen->info.has_accelerated_dot_product && 
sscreen->info.gfx_level >= GFX11,
+      .has_udot_4x8_sat = sscreen->info.has_accelerated_dot_product,
       .has_dot_2x16 = sscreen->info.has_accelerated_dot_product && 
sscreen->info.gfx_level < GFX11,
       .has_bfe = true,
       .has_bfm = true,
diff --git a/src/gallium/frontends/rusticl/api/device.rs 
b/src/gallium/frontends/rusticl/api/device.rs
index 43fee832c22..fbf1b4ea7e1 100644
--- a/src/gallium/frontends/rusticl/api/device.rs
+++ b/src/gallium/frontends/rusticl/api/device.rs
@@ -127,13 +127,16 @@ impl CLInfo<cl_device_info> for cl_device_id {
                     let sdot = dev.sdot_4x8_supported() && pack;
                     let udot = dev.udot_4x8_supported() && pack;
                     let sudot = dev.sudot_4x8_supported() && pack;
+                    let sdot_sat = dev.sdot_4x8_sat_supported() && pack;
+                    let udot_sat = dev.udot_4x8_sat_supported() && pack;
+                    let sudot_sat = dev.sudot_4x8_sat_supported() && pack;
                     IdpAccelProps::new(
                         sdot.into(),
                         udot.into(),
                         sudot.into(),
-                        sdot.into(),
-                        udot.into(),
-                        sudot.into(),
+                        sdot_sat.into(),
+                        udot_sat.into(),
+                        sudot_sat.into(),
                     )
                 })
             }
@@ -143,9 +146,9 @@ impl CLInfo<cl_device_info> for cl_device_id {
                         dev.sdot_4x8_supported().into(),
                         dev.udot_4x8_supported().into(),
                         dev.sudot_4x8_supported().into(),
-                        dev.sdot_4x8_supported().into(),
-                        dev.udot_4x8_supported().into(),
-                        dev.sudot_4x8_supported().into(),
+                        dev.sdot_4x8_sat_supported().into(),
+                        dev.udot_4x8_sat_supported().into(),
+                        dev.sudot_4x8_sat_supported().into(),
                     )
                 })
             }
diff --git a/src/gallium/frontends/rusticl/core/device.rs 
b/src/gallium/frontends/rusticl/core/device.rs
index 97f06c67b31..35b2331368d 100644
--- a/src/gallium/frontends/rusticl/core/device.rs
+++ b/src/gallium/frontends/rusticl/core/device.rs
@@ -763,6 +763,18 @@ impl Device {
         self.get_nir_options().has_pack_32_4x8
     }
 
+    pub fn sdot_4x8_sat_supported(&self) -> bool {
+        self.get_nir_options().has_sdot_4x8_sat
+    }
+
+    pub fn udot_4x8_sat_supported(&self) -> bool {
+        self.get_nir_options().has_udot_4x8_sat
+    }
+
+    pub fn sudot_4x8_sat_supported(&self) -> bool {
+        self.get_nir_options().has_sudot_4x8_sat
+    }
+
     pub fn fp64_is_softfp(&self) -> bool {
         bit_check(
             self.get_nir_options().lower_doubles_options as u32,
diff --git a/src/intel/compiler/brw_compiler.c 
b/src/intel/compiler/brw_compiler.c
index 4fbcf5ad880..10e9071a544 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -198,6 +198,9 @@ brw_compiler_create(void *mem_ctx, const struct 
intel_device_info *devinfo)
       nir_options->has_sdot_4x8 = devinfo->ver >= 12;
       nir_options->has_udot_4x8 = devinfo->ver >= 12;
       nir_options->has_sudot_4x8 = devinfo->ver >= 12;
+      nir_options->has_sdot_4x8_sat = devinfo->ver >= 12;
+      nir_options->has_udot_4x8_sat = devinfo->ver >= 12;
+      nir_options->has_sudot_4x8_sat = devinfo->ver >= 12;
 
       nir_options->lower_int64_options = int64_options;
       nir_options->lower_doubles_options = fp64_options;
diff --git a/src/microsoft/compiler/nir_to_dxil.c 
b/src/microsoft/compiler/nir_to_dxil.c
index 49554168788..17b04336a02 100644
--- a/src/microsoft/compiler/nir_to_dxil.c
+++ b/src/microsoft/compiler/nir_to_dxil.c
@@ -184,6 +184,8 @@ dxil_get_nir_compiler_options(nir_shader_compiler_options 
*options,
    if (shader_model_max >= SHADER_MODEL_6_4) {
       options->has_sdot_4x8 = true;
       options->has_udot_4x8 = true;
+      options->has_sdot_4x8_sat = true;
+      options->has_udot_4x8_sat = true;
    }
 }
 

Reply via email to