Module: Mesa Branch: main Commit: 09fc5e1c4dc4c2128b692f4f0688b8a69ba456fd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=09fc5e1c4dc4c2128b692f4f0688b8a69ba456fd
Author: Faith Ekstrand <faith.ekstr...@collabora.com> Date: Tue Dec 5 19:58:20 2023 -0600 nir: Split has_[su]dot_4x8 bits into regular and _sat versions Reviewed-by: Karol Herbst <kher...@redhat.com> Reviewed-by: Jesse Natalie <jenat...@microsoft.com> Reviewed-by: Ian Romanick <ian.d.roman...@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26533> --- src/amd/vulkan/radv_shader.c | 3 +++ src/compiler/nir/nir.h | 15 ++++++++++++--- src/compiler/nir/nir_opt_algebraic.py | 6 +++--- src/freedreno/ir3/ir3_compiler.c | 2 ++ src/gallium/drivers/radeonsi/si_get.c | 3 +++ src/gallium/frontends/rusticl/api/device.rs | 15 +++++++++------ src/gallium/frontends/rusticl/core/device.rs | 12 ++++++++++++ src/intel/compiler/brw_compiler.c | 3 +++ src/microsoft/compiler/nir_to_dxil.c | 2 ++ 9 files changed, 49 insertions(+), 12 deletions(-) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 05e19dc0aea..2265cca6733 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -105,6 +105,9 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s .has_sdot_4x8 = device->rad_info.has_accelerated_dot_product, .has_sudot_4x8 = device->rad_info.has_accelerated_dot_product && device->rad_info.gfx_level >= GFX11, .has_udot_4x8 = device->rad_info.has_accelerated_dot_product, + .has_sdot_4x8_sat = device->rad_info.has_accelerated_dot_product, + .has_sudot_4x8_sat = device->rad_info.has_accelerated_dot_product && device->rad_info.gfx_level >= GFX11, + .has_udot_4x8_sat = device->rad_info.has_accelerated_dot_product, .has_dot_2x16 = device->rad_info.has_accelerated_dot_product && device->rad_info.gfx_level < GFX11, .has_find_msb_rev = true, .has_pack_half_2x16_rtz = true, diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index b5eb056184e..1a5a820c7c4 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3876,15 +3876,24 @@ typedef struct nir_shader_compiler_options { * lowerings. */ bool has_texture_scaling; - /** Backend supports sdot_4x8 opcodes. */ + /** Backend supports sdot_4x8_iadd. */ bool has_sdot_4x8; - /** Backend supports udot_4x8 opcodes. */ + /** Backend supports udot_4x8_uadd. */ bool has_udot_4x8; - /** Backend supports sudot_4x8 opcodes. */ + /** Backend supports sudot_4x8_iadd. */ bool has_sudot_4x8; + /** Backend supports sdot_4x8_iadd_sat. */ + bool has_sdot_4x8_sat; + + /** Backend supports udot_4x8_uadd_sat. */ + bool has_udot_4x8_sat; + + /** Backend supports sudot_4x8_iadd_sat. */ + bool has_sudot_4x8_sat; + /** Backend supports sdot_2x16 and udot_2x16 opcodes. */ bool has_dot_2x16; diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 8e531b1e5b2..d16bdb84e5f 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -321,15 +321,15 @@ optimizations.extend([ # overflowing. 0x100000000 - 0x3f804 = 0xfffc07fc. If c is a constant # that is less than 0xfffc07fc, then the result cannot overflow ever. (('udot_4x8_uadd_sat', a, b, '#c(is_ult_0xfffc07fc)'), ('udot_4x8_uadd', a, b, c)), - (('udot_4x8_uadd_sat', a, b, c), ('uadd_sat', udot_4x8_a_b, c), '!options->has_udot_4x8'), + (('udot_4x8_uadd_sat', a, b, c), ('uadd_sat', udot_4x8_a_b, c), '!options->has_udot_4x8_sat'), # For the signed dot-product, the largest positive value is 4*(-128*-128) = # 0x10000, and the largest negative value is 4*(-128*127) = -0xfe00. We # don't have to worry about that intermediate result overflowing or # underflowing. - (('sdot_4x8_iadd_sat', a, b, c), ('iadd_sat', sdot_4x8_a_b, c), '!options->has_sdot_4x8'), + (('sdot_4x8_iadd_sat', a, b, c), ('iadd_sat', sdot_4x8_a_b, c), '!options->has_sdot_4x8_sat'), - (('sudot_4x8_iadd_sat', a, b, c), ('iadd_sat', sudot_4x8_a_b, c), '!options->has_sudot_4x8'), + (('sudot_4x8_iadd_sat', a, b, c), ('iadd_sat', sudot_4x8_a_b, c), '!options->has_sudot_4x8_sat'), (('udot_2x16_uadd_sat', a, b, c), ('uadd_sat', udot_2x16_a_b, c), '!options->has_dot_2x16'), (('sdot_2x16_iadd_sat', a, b, c), ('iadd_sat', sdot_2x16_a_b, c), '!options->has_dot_2x16'), diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 53ea182da21..80ca0656df7 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -290,6 +290,8 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, compiler->nir_options.has_sudot_4x8 = true, compiler->nir_options.has_udot_4x8 = dev_info->a6xx.has_dp2acc; compiler->nir_options.has_sudot_4x8 = dev_info->a6xx.has_dp2acc; + compiler->nir_options.has_udot_4x8_sat = dev_info->a6xx.has_dp2acc; + compiler->nir_options.has_sudot_4x8_sat = dev_info->a6xx.has_dp2acc; } else if (compiler->gen >= 3 && compiler->gen <= 5) { compiler->nir_options.vertex_id_zero_based = true; } else if (compiler->gen <= 2) { diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index b85fc7c6e88..59b17adac99 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -1424,6 +1424,9 @@ void si_init_screen_get_functions(struct si_screen *sscreen) .has_sdot_4x8 = sscreen->info.has_accelerated_dot_product, .has_sudot_4x8 = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level >= GFX11, .has_udot_4x8 = sscreen->info.has_accelerated_dot_product, + .has_sdot_4x8_sat = sscreen->info.has_accelerated_dot_product, + .has_sudot_4x8_sat = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level >= GFX11, + .has_udot_4x8_sat = sscreen->info.has_accelerated_dot_product, .has_dot_2x16 = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level < GFX11, .has_bfe = true, .has_bfm = true, diff --git a/src/gallium/frontends/rusticl/api/device.rs b/src/gallium/frontends/rusticl/api/device.rs index 43fee832c22..fbf1b4ea7e1 100644 --- a/src/gallium/frontends/rusticl/api/device.rs +++ b/src/gallium/frontends/rusticl/api/device.rs @@ -127,13 +127,16 @@ impl CLInfo<cl_device_info> for cl_device_id { let sdot = dev.sdot_4x8_supported() && pack; let udot = dev.udot_4x8_supported() && pack; let sudot = dev.sudot_4x8_supported() && pack; + let sdot_sat = dev.sdot_4x8_sat_supported() && pack; + let udot_sat = dev.udot_4x8_sat_supported() && pack; + let sudot_sat = dev.sudot_4x8_sat_supported() && pack; IdpAccelProps::new( sdot.into(), udot.into(), sudot.into(), - sdot.into(), - udot.into(), - sudot.into(), + sdot_sat.into(), + udot_sat.into(), + sudot_sat.into(), ) }) } @@ -143,9 +146,9 @@ impl CLInfo<cl_device_info> for cl_device_id { dev.sdot_4x8_supported().into(), dev.udot_4x8_supported().into(), dev.sudot_4x8_supported().into(), - dev.sdot_4x8_supported().into(), - dev.udot_4x8_supported().into(), - dev.sudot_4x8_supported().into(), + dev.sdot_4x8_sat_supported().into(), + dev.udot_4x8_sat_supported().into(), + dev.sudot_4x8_sat_supported().into(), ) }) } diff --git a/src/gallium/frontends/rusticl/core/device.rs b/src/gallium/frontends/rusticl/core/device.rs index 97f06c67b31..35b2331368d 100644 --- a/src/gallium/frontends/rusticl/core/device.rs +++ b/src/gallium/frontends/rusticl/core/device.rs @@ -763,6 +763,18 @@ impl Device { self.get_nir_options().has_pack_32_4x8 } + pub fn sdot_4x8_sat_supported(&self) -> bool { + self.get_nir_options().has_sdot_4x8_sat + } + + pub fn udot_4x8_sat_supported(&self) -> bool { + self.get_nir_options().has_udot_4x8_sat + } + + pub fn sudot_4x8_sat_supported(&self) -> bool { + self.get_nir_options().has_sudot_4x8_sat + } + pub fn fp64_is_softfp(&self) -> bool { bit_check( self.get_nir_options().lower_doubles_options as u32, diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 4fbcf5ad880..10e9071a544 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -198,6 +198,9 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo) nir_options->has_sdot_4x8 = devinfo->ver >= 12; nir_options->has_udot_4x8 = devinfo->ver >= 12; nir_options->has_sudot_4x8 = devinfo->ver >= 12; + nir_options->has_sdot_4x8_sat = devinfo->ver >= 12; + nir_options->has_udot_4x8_sat = devinfo->ver >= 12; + nir_options->has_sudot_4x8_sat = devinfo->ver >= 12; nir_options->lower_int64_options = int64_options; nir_options->lower_doubles_options = fp64_options; diff --git a/src/microsoft/compiler/nir_to_dxil.c b/src/microsoft/compiler/nir_to_dxil.c index 49554168788..17b04336a02 100644 --- a/src/microsoft/compiler/nir_to_dxil.c +++ b/src/microsoft/compiler/nir_to_dxil.c @@ -184,6 +184,8 @@ dxil_get_nir_compiler_options(nir_shader_compiler_options *options, if (shader_model_max >= SHADER_MODEL_6_4) { options->has_sdot_4x8 = true; options->has_udot_4x8 = true; + options->has_sdot_4x8_sat = true; + options->has_udot_4x8_sat = true; } }