Module: Mesa
Branch: main
Commit: ba6595e41f48a1bdb7a5d60075e885c681fc6e7f
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ba6595e41f48a1bdb7a5d60075e885c681fc6e7f

Author: Marek Olšák <marek.ol...@amd.com>
Date:   Tue Oct 24 07:46:22 2023 -0400

radeonsi: don't call nir_lower_compute_system_values too many times

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-pra...@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26095>

---

 src/gallium/drivers/radeonsi/si_shader_nir.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 59945538d91..142a1b408b9 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -301,7 +301,6 @@ static void si_lower_nir(struct si_screen *sscreen, struct 
nir_shader *nir)
    NIR_PASS_V(nir, nir_lower_var_copies);
    NIR_PASS_V(nir, nir_opt_intrinsics);
    NIR_PASS_V(nir, nir_lower_system_values);
-   NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
 
    /* si_nir_kill_outputs and ac_nir_optimize_outputs require outputs to be 
scalar. */
    if (nir->info.stage == MESA_SHADER_VERTEX ||
@@ -321,21 +320,22 @@ static void si_lower_nir(struct si_screen *sscreen, 
struct nir_shader *nir)
    }
 
    if (nir->info.stage == MESA_SHADER_COMPUTE) {
+      nir_lower_compute_system_values_options options = {0};
+
+      /* gl_LocalInvocationIndex must be derived from gl_LocalInvocationID.xyz 
to make it correct
+       * with quad derivatives. Using gl_SubgroupID for that (which is what we 
do by default) is
+       * incorrect with a non-linear thread order.
+       */
+      options.lower_local_invocation_index =
+         nir->info.cs.derivative_group == DERIVATIVE_GROUP_QUADS;
+      NIR_PASS_V(nir, nir_lower_compute_system_values, &options);
+
       if (nir->info.cs.derivative_group == DERIVATIVE_GROUP_QUADS) {
-         /* If we are shuffling local_invocation_id for quad derivatives, we
-          * need to derive local_invocation_index from local_invocation_id
-          * first, so that the value corresponds to the shuffled
-          * local_invocation_id.
-          */
-         nir_lower_compute_system_values_options options = {0};
-         options.lower_local_invocation_index = true;
+         nir_opt_cse(nir); /* CSE load_local_invocation_id */
+         memset(&options, 0, sizeof(options));
+         options.shuffle_local_ids_for_quad_derivatives = true;
          NIR_PASS_V(nir, nir_lower_compute_system_values, &options);
       }
-
-      nir_opt_cse(nir); /* CSE load_local_invocation_id */
-      nir_lower_compute_system_values_options options = {0};
-      options.shuffle_local_ids_for_quad_derivatives = true;
-      NIR_PASS_V(nir, nir_lower_compute_system_values, &options);
    }
 
    if (sscreen->b.get_shader_param(&sscreen->b, PIPE_SHADER_FRAGMENT, 
PIPE_SHADER_CAP_FP16)) {

Reply via email to