Module: Mesa
Branch: main
Commit: f6147051e2b3d01d8855c3bd89ad3d2ffee24bcd
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f6147051e2b3d01d8855c3bd89ad3d2ffee24bcd

Author: Konstantin Seurer <[email protected]>
Date:   Sun Mar 26 22:06:31 2023 +0200

radv: Stop counting user SGPRS separately

Renames radv_declare_shader_args to declare_shader_args and runs it
twice to first gather the user SGPR count without push constants and
descriptor sets.

Reviewed-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22119>

---

 src/amd/vulkan/radv_shader_args.c | 266 +++++++++++---------------------------
 1 file changed, 72 insertions(+), 194 deletions(-)

diff --git a/src/amd/vulkan/radv_shader_args.c 
b/src/amd/vulkan/radv_shader_args.c
index aa14e5023a2..a3761322589 100644
--- a/src/amd/vulkan/radv_shader_args.c
+++ b/src/amd/vulkan/radv_shader_args.c
@@ -37,62 +37,6 @@ struct user_sgpr_info {
    uint8_t remaining_sgprs;
 };
 
-static uint8_t
-count_vs_user_sgprs(const struct radv_shader_info *info)
-{
-   uint8_t count = 1; /* vertex offset */
-
-   if (info->vs.vb_desc_usage_mask)
-      count++;
-   if (info->vs.needs_draw_id)
-      count++;
-   if (info->vs.needs_base_instance)
-      count++;
-   if (info->vs.dynamic_num_verts_per_prim)
-      count++;
-
-   return count;
-}
-
-static uint8_t
-count_tes_user_sgprs(const struct radv_pipeline_key *key)
-{
-   unsigned count = 0;
-
-   if (key->dynamic_patch_control_points)
-      count++; /* tes_num_patches */
-
-   return count;
-}
-
-static uint8_t
-count_ms_user_sgprs(const struct radv_shader_info *info)
-{
-   uint8_t count = 3; /* num_work_groups[3] */
-
-   if (info->vs.needs_draw_id)
-      count++;
-   if (info->ms.has_task)
-      count++;
-
-   return count;
-}
-
-static unsigned
-count_ngg_sgprs(const struct radv_shader_info *info, bool has_ngg_query, bool 
has_ngg_provoking_vtx)
-{
-   unsigned count = 0;
-
-   if (has_ngg_query)
-      count += 1; /* ngg_query_state */
-   if (has_ngg_provoking_vtx)
-      count += 1; /* ngg_provoking_vtx */
-   if (info->has_ngg_culling)
-      count += 5; /* ngg_culling_settings + 4x ngg_viewport_* */
-
-   return count;
-}
-
 static void
 allocate_inline_push_consts(const struct radv_shader_info *info,
                             struct user_sgpr_info *user_sgpr_info)
@@ -124,110 +68,6 @@ allocate_inline_push_consts(const struct radv_shader_info 
*info,
    user_sgpr_info->inline_push_constant_mask = mask;
 }
 
-static void
-allocate_user_sgprs(enum amd_gfx_level gfx_level, const struct 
radv_shader_info *info,
-                    struct radv_shader_args *args, gl_shader_stage stage, bool 
has_previous_stage,
-                    gl_shader_stage previous_stage, bool needs_view_index, 
bool has_ngg_query,
-                    bool has_ngg_provoking_vtx, const struct radv_pipeline_key 
*key,
-                    struct user_sgpr_info *user_sgpr_info)
-{
-   uint8_t user_sgpr_count = 0;
-
-   memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
-
-   /* 2 user sgprs will always be allocated for scratch/rings */
-   user_sgpr_count += 2;
-
-   if (stage == MESA_SHADER_TASK)
-      user_sgpr_count += 2; /* task descriptors */
-
-   /* prolog inputs */
-   if (info->vs.has_prolog)
-      user_sgpr_count += 2;
-
-   switch (stage) {
-   case MESA_SHADER_COMPUTE:
-   case MESA_SHADER_TASK:
-      if (info->cs.is_rt_shader)
-         user_sgpr_count += 2; /* SBT descriptors */
-      if (info->cs.uses_grid_size)
-         user_sgpr_count += args->load_grid_size_from_user_sgpr ? 3 : 2;
-      if (info->cs.uses_ray_launch_size)
-         user_sgpr_count += 2;
-      if (info->cs.uses_dynamic_rt_callable_stack)
-         user_sgpr_count += 1;
-      if (info->vs.needs_draw_id)
-         user_sgpr_count += 1;
-      if (stage == MESA_SHADER_TASK)
-         user_sgpr_count += 4; /* ring_entry, 2x ib_addr, ib_stride */
-      break;
-   case MESA_SHADER_FRAGMENT:
-      /* epilog continue PC */
-      if (info->ps.has_epilog)
-         user_sgpr_count += 1;
-      if (info->ps.needs_sample_positions && 
key->dynamic_rasterization_samples)
-         user_sgpr_count += 1;
-      break;
-   case MESA_SHADER_VERTEX:
-      if (!args->is_gs_copy_shader)
-         user_sgpr_count += count_vs_user_sgprs(info);
-      break;
-   case MESA_SHADER_TESS_CTRL:
-      if (has_previous_stage) {
-         if (previous_stage == MESA_SHADER_VERTEX)
-            user_sgpr_count += count_vs_user_sgprs(info);
-      }
-      if (key->dynamic_patch_control_points)
-         user_sgpr_count += 1; /* tcs_offchip_layout */
-      break;
-   case MESA_SHADER_TESS_EVAL:
-      user_sgpr_count += count_tes_user_sgprs(key);
-      break;
-   case MESA_SHADER_GEOMETRY:
-      if (has_previous_stage) {
-         if (info->is_ngg)
-            user_sgpr_count += count_ngg_sgprs(info, has_ngg_query, 
has_ngg_provoking_vtx);
-
-         if (previous_stage == MESA_SHADER_VERTEX) {
-            user_sgpr_count += count_vs_user_sgprs(info);
-         } else if (previous_stage == MESA_SHADER_TESS_EVAL) {
-            user_sgpr_count += count_tes_user_sgprs(key);
-         } else if (previous_stage == MESA_SHADER_MESH) {
-            user_sgpr_count += count_ms_user_sgprs(info);
-         }
-      }
-      break;
-   default:
-      break;
-   }
-
-   if (needs_view_index)
-      user_sgpr_count++;
-
-   if (info->force_vrs_per_vertex)
-      user_sgpr_count++;
-
-   if (info->loads_push_constants)
-      user_sgpr_count++;
-
-   if (info->so.num_outputs)
-      user_sgpr_count++;
-
-   uint32_t available_sgprs =
-      gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != 
MESA_SHADER_TASK ? 32 : 16;
-   uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
-   uint32_t num_desc_set = util_bitcount(info->desc_set_used_mask);
-
-   if (remaining_sgprs < num_desc_set) {
-      user_sgpr_info->indirect_all_descriptor_sets = true;
-      user_sgpr_info->remaining_sgprs = remaining_sgprs - 1;
-   } else {
-      user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
-   }
-
-   allocate_inline_push_consts(info, user_sgpr_info);
-}
-
 static void
 add_ud_arg(struct radv_shader_args *args, unsigned size, enum ac_arg_type 
type, struct ac_arg *arg,
            enum radv_ud_index ud)
@@ -263,29 +103,32 @@ declare_global_input_sgprs(const struct radv_shader_info 
*info,
                            const struct user_sgpr_info *user_sgpr_info,
                            struct radv_shader_args *args)
 {
-   /* 1 for each descriptor set */
-   if (!user_sgpr_info->indirect_all_descriptor_sets) {
-      uint32_t mask = info->desc_set_used_mask;
+   if (user_sgpr_info) {
+      /* 1 for each descriptor set */
+      if (!user_sgpr_info->indirect_all_descriptor_sets) {
+         uint32_t mask = info->desc_set_used_mask;
 
-      while (mask) {
-         int i = u_bit_scan(&mask);
+         while (mask) {
+            int i = u_bit_scan(&mask);
 
-         add_descriptor_set(args, AC_ARG_CONST_PTR, &args->descriptor_sets[i], 
i);
+            add_descriptor_set(args, AC_ARG_CONST_PTR, 
&args->descriptor_sets[i], i);
+         }
+      } else {
+         add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0],
+                    AC_UD_INDIRECT_DESCRIPTOR_SETS);
       }
-   } else {
-      add_ud_arg(args, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0],
-                 AC_UD_INDIRECT_DESCRIPTOR_SETS);
-   }
 
-   if (info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts) 
{
-      /* 1 for push constants and dynamic descriptors */
-      add_ud_arg(args, 1, AC_ARG_CONST_PTR, &args->ac.push_constants, 
AC_UD_PUSH_CONSTANTS);
-   }
+      if (info->loads_push_constants && 
!user_sgpr_info->inlined_all_push_consts) {
+         /* 1 for push constants and dynamic descriptors */
+         add_ud_arg(args, 1, AC_ARG_CONST_PTR, &args->ac.push_constants, 
AC_UD_PUSH_CONSTANTS);
+      }
 
-   for (unsigned i = 0; i < 
util_bitcount64(user_sgpr_info->inline_push_constant_mask); i++) {
-      add_ud_arg(args, 1, AC_ARG_INT, &args->ac.inline_push_consts[i], 
AC_UD_INLINE_PUSH_CONSTANTS);
+      for (unsigned i = 0; i < 
util_bitcount64(user_sgpr_info->inline_push_constant_mask); i++) {
+         add_ud_arg(args, 1, AC_ARG_INT, &args->ac.inline_push_consts[i],
+                    AC_UD_INLINE_PUSH_CONSTANTS);
+      }
+      args->ac.inline_push_const_mask = 
user_sgpr_info->inline_push_constant_mask;
    }
-   args->ac.inline_push_const_mask = user_sgpr_info->inline_push_constant_mask;
 
    if (info->so.num_outputs) {
       add_ud_arg(args, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers, 
AC_UD_STREAMOUT_BUFFERS);
@@ -529,14 +372,13 @@ radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, 
struct radv_shader_arg
    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, 
&args->ac.rt_dynamic_callable_stack_base);
 }
 
-void
-radv_declare_shader_args(const struct radv_device *device, const struct 
radv_pipeline_key *key,
-                         const struct radv_shader_info *info, gl_shader_stage 
stage,
-                         bool has_previous_stage, gl_shader_stage 
previous_stage,
-                         struct radv_shader_args *args)
+static void
+declare_shader_args(const struct radv_device *device, const struct 
radv_pipeline_key *key,
+                    const struct radv_shader_info *info, gl_shader_stage stage,
+                    bool has_previous_stage, gl_shader_stage previous_stage,
+                    struct radv_shader_args *args, struct user_sgpr_info 
*user_sgpr_info)
 {
    const enum amd_gfx_level gfx_level = 
device->physical_device->rad_info.gfx_level;
-   struct user_sgpr_info user_sgpr_info;
    bool needs_view_index = info->uses_view_index;
    bool has_ngg_query = info->has_ngg_prim_query || info->has_ngg_xfb_query ||
                         (stage == MESA_SHADER_GEOMETRY && 
info->gs.has_ngg_pipeline_stat_query);
@@ -557,9 +399,6 @@ radv_declare_shader_args(const struct radv_device *device, 
const struct radv_pip
       return;
    }
 
-   allocate_user_sgprs(gfx_level, info, args, stage, has_previous_stage, 
previous_stage,
-                       needs_view_index, has_ngg_query, has_ngg_provoking_vtx, 
key, &user_sgpr_info);
-
    add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets, 
AC_UD_SCRATCH_RING_OFFSETS);
    if (stage == MESA_SHADER_TASK) {
       add_ud_arg(args, 2, AC_ARG_CONST_DESC_PTR, &args->task_ring_offsets,
@@ -579,7 +418,7 @@ radv_declare_shader_args(const struct radv_device *device, 
const struct radv_pip
    switch (stage) {
    case MESA_SHADER_COMPUTE:
    case MESA_SHADER_TASK:
-      declare_global_input_sgprs(info, &user_sgpr_info, args);
+      declare_global_input_sgprs(info, user_sgpr_info, args);
 
       if (info->cs.uses_grid_size) {
          if (args->load_grid_size_from_user_sgpr)
@@ -632,7 +471,7 @@ radv_declare_shader_args(const struct radv_device *device, 
const struct radv_pip
 
       declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, 
previous_stage);
 
-      declare_global_input_sgprs(info, &user_sgpr_info, args);
+      declare_global_input_sgprs(info, user_sgpr_info, args);
 
       if (needs_view_index) {
          add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, 
AC_UD_VIEW_INDEX);
@@ -674,7 +513,7 @@ radv_declare_shader_args(const struct radv_device *device, 
const struct radv_pip
 
          declare_vs_specific_input_sgprs(info, args, stage, 
has_previous_stage, previous_stage);
 
-         declare_global_input_sgprs(info, &user_sgpr_info, args);
+         declare_global_input_sgprs(info, user_sgpr_info, args);
 
          if (needs_view_index) {
             add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, 
AC_UD_VIEW_INDEX);
@@ -689,7 +528,7 @@ radv_declare_shader_args(const struct radv_device *device, 
const struct radv_pip
 
          declare_vs_input_vgprs(gfx_level, info, args, true);
       } else {
-         declare_global_input_sgprs(info, &user_sgpr_info, args);
+         declare_global_input_sgprs(info, user_sgpr_info, args);
 
          if (needs_view_index) {
             add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, 
AC_UD_VIEW_INDEX);
@@ -712,7 +551,7 @@ radv_declare_shader_args(const struct radv_device *device, 
const struct radv_pip
       /* NGG is handled by the GS case */
       assert(!info->is_ngg);
 
-      declare_global_input_sgprs(info, &user_sgpr_info, args);
+      declare_global_input_sgprs(info, user_sgpr_info, args);
 
       if (needs_view_index)
          add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, 
AC_UD_VIEW_INDEX);
@@ -760,7 +599,7 @@ radv_declare_shader_args(const struct radv_device *device, 
const struct radv_pip
             declare_ms_input_sgprs(info, args);
          }
 
-         declare_global_input_sgprs(info, &user_sgpr_info, args);
+         declare_global_input_sgprs(info, user_sgpr_info, args);
 
          if (needs_view_index) {
             add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, 
AC_UD_VIEW_INDEX);
@@ -795,7 +634,7 @@ radv_declare_shader_args(const struct radv_device *device, 
const struct radv_pip
             declare_ms_input_vgprs(args);
          }
       } else {
-         declare_global_input_sgprs(info, &user_sgpr_info, args);
+         declare_global_input_sgprs(info, user_sgpr_info, args);
 
          if (needs_view_index) {
             add_ud_arg(args, 1, AC_ARG_INT, &args->ac.view_index, 
AC_UD_VIEW_INDEX);
@@ -821,7 +660,7 @@ radv_declare_shader_args(const struct radv_device *device, 
const struct radv_pip
       }
       break;
    case MESA_SHADER_FRAGMENT:
-      declare_global_input_sgprs(info, &user_sgpr_info, args);
+      declare_global_input_sgprs(info, user_sgpr_info, args);
 
       if (info->ps.has_epilog) {
          add_ud_arg(args, 1, AC_ARG_INT, &args->ps_epilog_pc, 
AC_UD_PS_EPILOG_PC);
@@ -843,6 +682,45 @@ radv_declare_shader_args(const struct radv_device *device, 
const struct radv_pip
    }
 }
 
+void
+radv_declare_shader_args(const struct radv_device *device, const struct 
radv_pipeline_key *key,
+                         const struct radv_shader_info *info, gl_shader_stage 
stage,
+                         bool has_previous_stage, gl_shader_stage 
previous_stage,
+                         struct radv_shader_args *args)
+{
+   declare_shader_args(device, key, info, stage, has_previous_stage, 
previous_stage, args, NULL);
+
+   if (gl_shader_stage_is_rt(stage))
+      return;
+
+   uint32_t num_user_sgprs = args->num_user_sgprs;
+   if (info->loads_push_constants)
+      num_user_sgprs++;
+
+   const enum amd_gfx_level gfx_level = 
device->physical_device->rad_info.gfx_level;
+   uint32_t available_sgprs =
+      gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != 
MESA_SHADER_TASK ? 32 : 16;
+   uint32_t remaining_sgprs = available_sgprs - num_user_sgprs;
+
+   struct user_sgpr_info user_sgpr_info = {
+      .remaining_sgprs = remaining_sgprs,
+   };
+
+   uint32_t num_desc_set = util_bitcount(info->desc_set_used_mask);
+
+   if (remaining_sgprs < num_desc_set) {
+      user_sgpr_info.indirect_all_descriptor_sets = true;
+      user_sgpr_info.remaining_sgprs--;
+   } else {
+      user_sgpr_info.remaining_sgprs -= num_desc_set;
+   }
+
+   allocate_inline_push_consts(info, &user_sgpr_info);
+
+   declare_shader_args(device, key, info, stage, has_previous_stage, 
previous_stage, args,
+                       &user_sgpr_info);
+}
+
 void
 radv_declare_ps_epilog_args(const struct radv_device *device, const struct 
radv_ps_epilog_key *key,
                             struct radv_shader_args *args)

Reply via email to