Mesa (main): radv: cleanup computing the workgroup size for all stages

GitLab Mirror Thu, 01 Sep 2022 10:23:30 -0700

Module: Mesa
Branch: main
Commit: 60a7115b4e904ad171a33154209693e0fecade8f
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=60a7115b4e904ad171a33154209693e0fecade8f


Author: Samuel Pitoiset <[email protected]>
Date:   Fri Aug 26 14:18:33 2022 +0200

radv: cleanup computing the workgroup size for all stages

Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Timur Kristóf <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18278>

---

 src/amd/vulkan/radv_pipeline.c    | 35 ---------------
 src/amd/vulkan/radv_shader_info.c | 90 ++++++++++++++++++++++++++-------------
 2 files changed, 60 insertions(+), 65 deletions(-)

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 402674a7df7..0905506d267 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2892,41 +2892,6 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
    }
 
    radv_nir_shader_info_link(device, pipeline_key, stages, pipeline_has_ngg, 
last_vgt_api_stage);
-
-   if (stages[MESA_SHADER_TESS_CTRL].nir) {
-      for (gl_shader_stage s = MESA_SHADER_VERTEX; s <= MESA_SHADER_TESS_CTRL; 
++s) {
-         stages[s].info.workgroup_size =
-            
ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level, s,
-                                           
stages[MESA_SHADER_TESS_CTRL].info.num_tess_patches,
-                                           
pipeline_key->tcs.tess_input_vertices,
-                                           
stages[MESA_SHADER_TESS_CTRL].info.tcs.tcs_vertices_out);
-      }
-   }
-
-   /* PS always operates without workgroups. */
-   if (stages[MESA_SHADER_FRAGMENT].nir)
-      stages[MESA_SHADER_FRAGMENT].info.workgroup_size = 
stages[MESA_SHADER_FRAGMENT].info.wave_size;
-
-   if (stages[MESA_SHADER_COMPUTE].nir) {
-      /* Variable workgroup size is not supported by Vulkan. */
-      assert(!stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size_variable);
-
-      stages[MESA_SHADER_COMPUTE].info.workgroup_size =
-         ac_compute_cs_workgroup_size(
-            stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size, false, 
UINT32_MAX);
-   }
-
-   if (stages[MESA_SHADER_TASK].nir) {
-      stages[MESA_SHADER_TASK].info.workgroup_size =
-         ac_compute_cs_workgroup_size(
-            stages[MESA_SHADER_TASK].nir->info.workgroup_size, false, 
UINT32_MAX);
-   }
-
-   if (!pipeline_has_ngg && !stages[MESA_SHADER_GEOMETRY].nir) {
-      gl_shader_stage hw_vs_api_stage =
-         stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : 
MESA_SHADER_VERTEX;
-      stages[hw_vs_api_stage].info.workgroup_size = 
stages[hw_vs_api_stage].info.wave_size;
-   }
 }
 
 static void
diff --git a/src/amd/vulkan/radv_shader_info.c 
b/src/amd/vulkan/radv_shader_info.c
index e69b2ce5641..383b4894edd 100644
--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -796,6 +796,23 @@ radv_nir_shader_info_pass(struct radv_device *device, 
const struct nir_shader *n
 
    info->wave_size = radv_get_wave_size(device, nir->info.stage, info);
    info->ballot_bit_size = radv_get_ballot_bit_size(device, nir->info.stage, 
info);
+
+   switch (nir->info.stage) {
+   case MESA_SHADER_COMPUTE:
+   case MESA_SHADER_TASK:
+      info->workgroup_size =
+         ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, 
UINT32_MAX);
+      break;
+   case MESA_SHADER_MESH:
+      /* Already computed in gather_shader_info_mesh(). */
+      break;
+   default:
+      /* FS always operates without workgroups. Other stages are computed 
during linking but assume
+       * no workgroups by default.
+       */
+      info->workgroup_size = info->wave_size;
+      break;
+   }
 }
 
 static void
@@ -1273,39 +1290,52 @@ radv_link_shaders_info(struct radv_device *device,
       }
    }
 
-   if (producer->stage == MESA_SHADER_VERTEX && consumer->stage == 
MESA_SHADER_TESS_CTRL &&
-       !radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) {
+   if (producer->stage == MESA_SHADER_VERTEX && consumer->stage == 
MESA_SHADER_TESS_CTRL) {
       struct radv_pipeline_stage *vs_stage = producer;
       struct radv_pipeline_stage *tcs_stage = consumer;
 
-      /* When the number of TCS input and output vertices are the same 
(typically 3):
-       * - There is an equal amount of LS and HS invocations
-       * - In case of merged LSHS shaders, the LS and HS halves of the shader 
always process the
-       *   exact same vertex. We can use this knowledge to optimize them.
-       *
-       * We don't set tcs_in_out_eq if the float controls differ because that 
might involve
-       * different float modes for the same block and our optimizer doesn't 
handle a instruction
-       * dominating another with a different mode.
-       */
-      vs_stage->info.vs.tcs_in_out_eq =
-         device->physical_device->rad_info.gfx_level >= GFX9 &&
-         pipeline_key->tcs.tess_input_vertices == 
tcs_stage->info.tcs.tcs_vertices_out &&
-         vs_stage->nir->info.float_controls_execution_mode ==
-            tcs_stage->nir->info.float_controls_execution_mode;
-
-      if (vs_stage->info.vs.tcs_in_out_eq)
-         vs_stage->info.vs.tcs_temp_only_input_mask =
-            tcs_stage->nir->info.inputs_read &
-            vs_stage->nir->info.outputs_written &
-            ~tcs_stage->nir->info.tess.tcs_cross_invocation_inputs_read &
-            ~tcs_stage->nir->info.inputs_read_indirectly &
-            ~vs_stage->nir->info.outputs_accessed_indirectly;
-
-      /* Copy data to TCS so it can be accessed by the backend if they are 
merged. */
-      tcs_stage->info.vs.tcs_in_out_eq =
-         vs_stage->info.vs.tcs_in_out_eq;
-      tcs_stage->info.vs.tcs_temp_only_input_mask =
-         vs_stage->info.vs.tcs_temp_only_input_mask;
+      vs_stage->info.workgroup_size =
+         
ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level,
+                                        MESA_SHADER_VERTEX, 
tcs_stage->info.num_tess_patches,
+                                        pipeline_key->tcs.tess_input_vertices,
+                                        tcs_stage->info.tcs.tcs_vertices_out);
+
+      tcs_stage->info.workgroup_size =
+         
ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level,
+                                        MESA_SHADER_TESS_CTRL, 
tcs_stage->info.num_tess_patches,
+                                        pipeline_key->tcs.tess_input_vertices,
+                                        tcs_stage->info.tcs.tcs_vertices_out);
+
+      if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) {
+         /* When the number of TCS input and output vertices are the same 
(typically 3):
+          * - There is an equal amount of LS and HS invocations
+          * - In case of merged LSHS shaders, the LS and HS halves of the 
shader always process the
+          *   exact same vertex. We can use this knowledge to optimize them.
+          *
+          * We don't set tcs_in_out_eq if the float controls differ because 
that might involve
+          * different float modes for the same block and our optimizer doesn't 
handle a instruction
+          * dominating another with a different mode.
+          */
+         vs_stage->info.vs.tcs_in_out_eq =
+            device->physical_device->rad_info.gfx_level >= GFX9 &&
+            pipeline_key->tcs.tess_input_vertices == 
tcs_stage->info.tcs.tcs_vertices_out &&
+            vs_stage->nir->info.float_controls_execution_mode ==
+               tcs_stage->nir->info.float_controls_execution_mode;
+
+         if (vs_stage->info.vs.tcs_in_out_eq)
+            vs_stage->info.vs.tcs_temp_only_input_mask =
+               tcs_stage->nir->info.inputs_read &
+               vs_stage->nir->info.outputs_written &
+               ~tcs_stage->nir->info.tess.tcs_cross_invocation_inputs_read &
+               ~tcs_stage->nir->info.inputs_read_indirectly &
+               ~vs_stage->nir->info.outputs_accessed_indirectly;
+
+         /* Copy data to TCS so it can be accessed by the backend if they are 
merged. */
+         tcs_stage->info.vs.tcs_in_out_eq =
+            vs_stage->info.vs.tcs_in_out_eq;
+         tcs_stage->info.vs.tcs_temp_only_input_mask =
+            vs_stage->info.vs.tcs_temp_only_input_mask;
+      }
    }
 
    /* Copy shader info between TCS<->TES. */

Mesa (main): radv: cleanup computing the workgroup size for all stages

Reply via email to