Module: Mesa
Branch: main
Commit: d5c310899a72bfa400750ae37169fc6bf3e4f83b
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d5c310899a72bfa400750ae37169fc6bf3e4f83b

Author: Faith Ekstrand <[email protected]>
Date:   Tue Oct 24 13:40:47 2023 -0500

nir: Split nir_lower_subgroup_options::lower_vote_eq into two bits

On NVIDIA, we can do a vote_ieq on bool in one hardware op so we don't
want that lowered.  We do want to lower vote_feq and other vote_ieq,
though.

Reviewed-by: Daniel Schürmann <[email protected]>
Reviewed-by: Timur Kristóf <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25894>

---

 src/amd/vulkan/radv_shader.c                 | 1 +
 src/compiler/nir/nir.h                       | 1 +
 src/compiler/nir/nir_lower_subgroups.c       | 9 +++++++--
 src/freedreno/ir3/ir3_nir.c                  | 1 +
 src/gallium/drivers/radeonsi/si_shader_nir.c | 1 +
 5 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 62625274578..19944d3d63e 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -644,6 +644,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const 
struct radv_shader_st
                .lower_relative_shuffle = 1,
                .lower_shuffle_to_32bit = 1,
                .lower_vote_eq = 1,
+               .lower_vote_bool_eq = 1,
                .lower_quad_broadcast_dynamic = 1,
                .lower_quad_broadcast_dynamic_to_const = gfx7minus,
                .lower_shuffle_to_swizzle_amd = 1,
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 4839ef0b0ba..365c47893fa 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -5527,6 +5527,7 @@ typedef struct nir_lower_subgroups_options {
    bool lower_to_scalar : 1;
    bool lower_vote_trivial : 1;
    bool lower_vote_eq : 1;
+   bool lower_vote_bool_eq : 1;
    bool lower_first_invocation_to_ballot : 1;
    bool lower_read_first_invocation : 1;
    bool lower_subgroup_masks : 1;
diff --git a/src/compiler/nir/nir_lower_subgroups.c 
b/src/compiler/nir/nir_lower_subgroups.c
index 8c53d7fb6b8..0e2c6a236bc 100644
--- a/src/compiler/nir/nir_lower_subgroups.c
+++ b/src/compiler/nir/nir_lower_subgroups.c
@@ -589,8 +589,13 @@ lower_subgroups_instr(nir_builder *b, nir_instr *instr, 
void *_options)
       if (options->lower_vote_trivial)
          return nir_imm_true(b);
 
-      if (options->lower_vote_eq)
-         return lower_vote_eq(b, intrin);
+      if (nir_src_bit_size(intrin->src[0]) == 1) {
+         if (options->lower_vote_bool_eq)
+            return lower_vote_eq(b, intrin);
+      } else {
+         if (options->lower_vote_eq)
+            return lower_vote_eq(b, intrin);
+      }
 
       if (options->lower_to_scalar && intrin->num_components > 1)
          return lower_vote_eq_to_scalar(b, intrin);
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 21b3576ec7b..07b212f4cc9 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -560,6 +560,7 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
             .ballot_components = max_subgroup_size / 32,
             .lower_to_scalar = true,
             .lower_vote_eq = true,
+            .lower_vote_bool_eq = true,
             .lower_subgroup_masks = true,
             .lower_read_invocation_to_cond = true,
             .lower_shuffle = true,
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index fce4a05d034..59945538d91 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -254,6 +254,7 @@ const nir_lower_subgroups_options si_nir_subgroups_options 
= {
    .lower_subgroup_masks = true,
    .lower_vote_trivial = false,
    .lower_vote_eq = true,
+   .lower_vote_bool_eq = true,
    .lower_inverse_ballot = true,
 };
 

Reply via email to