Module: Mesa
Branch: main
Commit: 13c55a8e869e53333c832b1f28e20162ebeb2686
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=13c55a8e869e53333c832b1f28e20162ebeb2686

Author: Daniel Schürmann <[email protected]>
Date:   Thu Mar 16 22:24:49 2023 +0100

radv: remove unnecessary copy of binary->config

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22029>

---

 src/amd/vulkan/radv_shader.c | 141 ++++++++++++++++++++-----------------------
 1 file changed, 65 insertions(+), 76 deletions(-)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 8a08e919c08..c9046742297 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -1821,38 +1821,36 @@ radv_should_use_wgp_mode(const struct radv_device 
*device, gl_shader_stage stage
 }
 
 static void
-radv_postprocess_config(const struct radv_device *device, const struct 
ac_shader_config *config_in,
+radv_postprocess_config(const struct radv_device *device, struct 
ac_shader_config *config,
                         const struct radv_shader_info *info, gl_shader_stage 
stage,
-                        const struct radv_shader_args *args,
-                        struct ac_shader_config *config_out)
+                        const struct radv_shader_args *args)
 {
    const struct radv_physical_device *pdevice = device->physical_device;
-   bool scratch_enabled = config_in->scratch_bytes_per_wave > 0 || 
info->cs.is_rt_shader;
+   bool scratch_enabled = config->scratch_bytes_per_wave > 0 || 
info->cs.is_rt_shader;
    bool trap_enabled = !!device->trap_handler_shader;
    unsigned vgpr_comp_cnt = 0;
    unsigned num_input_vgprs = args->ac.num_vgprs_used;
 
    if (stage == MESA_SHADER_FRAGMENT) {
-      num_input_vgprs = ac_get_fs_input_vgpr_cnt(config_in, NULL, NULL, NULL);
+      num_input_vgprs = ac_get_fs_input_vgpr_cnt(config, NULL, NULL, NULL);
    }
 
-   unsigned num_vgprs = MAX2(config_in->num_vgprs, num_input_vgprs);
+   unsigned num_vgprs = MAX2(config->num_vgprs, num_input_vgprs);
    /* +2 for the ring offsets, +3 for scratch wave offset and VCC */
-   unsigned num_sgprs = MAX2(config_in->num_sgprs, args->ac.num_sgprs_used + 2 
+ 3);
-   unsigned num_shared_vgprs = config_in->num_shared_vgprs;
+   unsigned num_sgprs = MAX2(config->num_sgprs, args->ac.num_sgprs_used + 2 + 
3);
+   unsigned num_shared_vgprs = config->num_shared_vgprs;
    /* shared VGPRs are introduced in Navi and are allocated in blocks of 8 
(RDNA ref 3.6.5) */
    assert((pdevice->rad_info.gfx_level >= GFX10 && num_shared_vgprs % 8 == 0) 
||
           (pdevice->rad_info.gfx_level < GFX10 && num_shared_vgprs == 0));
    unsigned num_shared_vgpr_blocks = num_shared_vgprs / 8;
    unsigned excp_en = 0;
 
-   *config_out = *config_in;
-   config_out->num_vgprs = num_vgprs;
-   config_out->num_sgprs = num_sgprs;
-   config_out->num_shared_vgprs = num_shared_vgprs;
+   config->num_vgprs = num_vgprs;
+   config->num_sgprs = num_sgprs;
+   config->num_shared_vgprs = num_shared_vgprs;
 
-   config_out->rsrc2 = S_00B12C_USER_SGPR(args->num_user_sgprs) |
-                       S_00B12C_SCRATCH_EN(scratch_enabled) | 
S_00B12C_TRAP_PRESENT(trap_enabled);
+   config->rsrc2 = S_00B12C_USER_SGPR(args->num_user_sgprs) | 
S_00B12C_SCRATCH_EN(scratch_enabled) |
+                   S_00B12C_TRAP_PRESENT(trap_enabled);
 
    if (trap_enabled) {
       /* Configure the shader exceptions like memory violation, etc.
@@ -1862,20 +1860,20 @@ radv_postprocess_config(const struct radv_device 
*device, const struct ac_shader
    }
 
    if (!pdevice->use_ngg_streamout) {
-      config_out->rsrc2 |=
+      config->rsrc2 |=
          S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) | 
S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
          S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) | 
S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
          S_00B12C_SO_EN(!!info->so.num_outputs);
    }
 
-   config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / (info->wave_size == 32 
? 8 : 4)) |
-                       S_00B848_DX10_CLAMP(1) | 
S_00B848_FLOAT_MODE(config_out->float_mode);
+   config->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / (info->wave_size == 32 ? 8 
: 4)) |
+                   S_00B848_DX10_CLAMP(1) | 
S_00B848_FLOAT_MODE(config->float_mode);
 
    if (pdevice->rad_info.gfx_level >= GFX10) {
-      config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(args->num_user_sgprs 
>> 5);
+      config->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(args->num_user_sgprs >> 5);
    } else {
-      config_out->rsrc1 |= S_00B228_SGPRS((num_sgprs - 1) / 8);
-      config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX9(args->num_user_sgprs >> 
5);
+      config->rsrc1 |= S_00B228_SGPRS((num_sgprs - 1) / 8);
+      config->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX9(args->num_user_sgprs >> 5);
    }
 
    bool wgp_mode = radv_should_use_wgp_mode(device, stage, info);
@@ -1883,21 +1881,21 @@ radv_postprocess_config(const struct radv_device 
*device, const struct ac_shader
    switch (stage) {
    case MESA_SHADER_TESS_EVAL:
       if (info->is_ngg) {
-         config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.gfx_level 
>= GFX10);
-         config_out->rsrc2 |= S_00B22C_OC_LDS_EN(1) | 
S_00B22C_EXCP_EN(excp_en);
+         config->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.gfx_level >= 
GFX10);
+         config->rsrc2 |= S_00B22C_OC_LDS_EN(1) | S_00B22C_EXCP_EN(excp_en);
       } else if (info->tes.as_es) {
          assert(pdevice->rad_info.gfx_level <= GFX8);
          vgpr_comp_cnt = info->uses_prim_id ? 3 : 2;
 
-         config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) | 
S_00B12C_EXCP_EN(excp_en);
+         config->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en);
       } else {
          bool enable_prim_id = info->outinfo.export_prim_id || 
info->uses_prim_id;
          vgpr_comp_cnt = enable_prim_id ? 3 : 2;
 
-         config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.gfx_level 
>= GFX10);
-         config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) | 
S_00B12C_EXCP_EN(excp_en);
+         config->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.gfx_level >= 
GFX10);
+         config->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en);
       }
-      config_out->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
+      config->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
       break;
    case MESA_SHADER_TESS_CTRL:
       if (pdevice->rad_info.gfx_level >= GFX9) {
@@ -1911,21 +1909,21 @@ radv_postprocess_config(const struct radv_device 
*device, const struct ac_shader
             } else if (pdevice->rad_info.gfx_level <= GFX10_3) {
                vgpr_comp_cnt = 1;
             }
-            config_out->rsrc2 |= S_00B42C_EXCP_EN_GFX6(excp_en);
+            config->rsrc2 |= S_00B42C_EXCP_EN_GFX6(excp_en);
          } else {
             vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1;
-            config_out->rsrc2 |= S_00B42C_EXCP_EN_GFX9(excp_en);
+            config->rsrc2 |= S_00B42C_EXCP_EN_GFX9(excp_en);
          }
       } else {
-         config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) | 
S_00B12C_EXCP_EN(excp_en);
+         config->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en);
       }
-      config_out->rsrc1 |=
+      config->rsrc1 |=
          S_00B428_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10) | 
S_00B428_WGP_MODE(wgp_mode);
-      config_out->rsrc2 |= S_00B42C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
+      config->rsrc2 |= S_00B42C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
       break;
    case MESA_SHADER_VERTEX:
       if (info->is_ngg) {
-         config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.gfx_level 
>= GFX10);
+         config->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.gfx_level >= 
GFX10);
       } else if (info->vs.as_ls) {
          assert(pdevice->rad_info.gfx_level <= GFX8);
          /* We need at least 2 components for LS.
@@ -1952,25 +1950,21 @@ radv_postprocess_config(const struct radv_device 
*device, const struct ac_shader
             vgpr_comp_cnt = 0;
          }
 
-         config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.gfx_level 
>= GFX10);
+         config->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.gfx_level >= 
GFX10);
       }
-      config_out->rsrc2 |=
-         S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | 
S_00B12C_EXCP_EN(excp_en);
+      config->rsrc2 |= S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | 
S_00B12C_EXCP_EN(excp_en);
       break;
    case MESA_SHADER_MESH:
-      config_out->rsrc1 |= S_00B228_MEM_ORDERED(1);
-      config_out->rsrc2 |=
-         S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | 
S_00B12C_EXCP_EN(excp_en);
+      config->rsrc1 |= S_00B228_MEM_ORDERED(1);
+      config->rsrc2 |= S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | 
S_00B12C_EXCP_EN(excp_en);
       break;
    case MESA_SHADER_FRAGMENT:
-      config_out->rsrc1 |= S_00B028_MEM_ORDERED(pdevice->rad_info.gfx_level >= 
GFX10);
-      config_out->rsrc2 |= S_00B02C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) |
-                           S_00B02C_EXCP_EN(excp_en);
+      config->rsrc1 |= S_00B028_MEM_ORDERED(pdevice->rad_info.gfx_level >= 
GFX10);
+      config->rsrc2 |= S_00B02C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | 
S_00B02C_EXCP_EN(excp_en);
       break;
    case MESA_SHADER_GEOMETRY:
-      config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.gfx_level >= 
GFX10);
-      config_out->rsrc2 |=
-         S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | 
S_00B22C_EXCP_EN(excp_en);
+      config->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.gfx_level >= 
GFX10);
+      config->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | 
S_00B22C_EXCP_EN(excp_en);
       break;
    case MESA_SHADER_RAYGEN:
    case MESA_SHADER_CLOSEST_HIT:
@@ -1978,21 +1972,21 @@ radv_postprocess_config(const struct radv_device 
*device, const struct ac_shader
    case MESA_SHADER_CALLABLE:
    case MESA_SHADER_INTERSECTION:
    case MESA_SHADER_ANY_HIT:
-      config_out->rsrc2 |= S_00B12C_SCRATCH_EN(1);
+      config->rsrc2 |= S_00B12C_SCRATCH_EN(1);
       FALLTHROUGH;
    case MESA_SHADER_COMPUTE:
    case MESA_SHADER_TASK:
-      config_out->rsrc1 |=
+      config->rsrc1 |=
          S_00B848_MEM_ORDERED(pdevice->rad_info.gfx_level >= GFX10) | 
S_00B848_WGP_MODE(wgp_mode);
-      config_out->rsrc2 |= S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) |
-                           S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) |
-                           S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) |
-                           S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2]  
 ? 2
-                                                   : 
info->cs.uses_thread_id[1] ? 1
-                                                                               
 : 0) |
-                           
S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) |
-                           S_00B84C_LDS_SIZE(config_in->lds_size) | 
S_00B84C_EXCP_EN(excp_en);
-      config_out->rsrc3 |= S_00B8A0_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
+      config->rsrc2 |= S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) |
+                       S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) |
+                       S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) |
+                       S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2]   ? 2
+                                               : info->cs.uses_thread_id[1] ? 1
+                                                                            : 
0) |
+                       S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) 
|
+                       S_00B84C_LDS_SIZE(config->lds_size) | 
S_00B84C_EXCP_EN(excp_en);
+      config->rsrc3 |= S_00B8A0_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
 
       break;
    default:
@@ -2049,11 +2043,10 @@ radv_postprocess_config(const struct radv_device 
*device, const struct ac_shader
        * happened on VanGogh) Let's disable it on all chips that
        * disable exactly 1 CU per SA for GS.
        */
-      config_out->rsrc1 |=
-         S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | 
S_00B228_WGP_MODE(wgp_mode);
-      config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
-                           S_00B22C_LDS_SIZE(config_in->lds_size) |
-                           S_00B22C_OC_LDS_EN(es_stage == 
MESA_SHADER_TESS_EVAL);
+      config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | 
S_00B228_WGP_MODE(wgp_mode);
+      config->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
+                       S_00B22C_LDS_SIZE(config->lds_size) |
+                       S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL);
    } else if (pdevice->rad_info.gfx_level >= GFX9 && stage == 
MESA_SHADER_GEOMETRY) {
       unsigned es_type = info->gs.es_type;
       unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
@@ -2084,14 +2077,13 @@ radv_postprocess_config(const struct radv_device 
*device, const struct ac_shader
          gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
       }
 
-      config_out->rsrc1 |=
-         S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | 
S_00B228_WGP_MODE(wgp_mode);
-      config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
-                           S_00B22C_OC_LDS_EN(es_type == 
MESA_SHADER_TESS_EVAL);
+      config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | 
S_00B228_WGP_MODE(wgp_mode);
+      config->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
+                       S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL);
    } else if (pdevice->rad_info.gfx_level >= GFX9 && stage == 
MESA_SHADER_TESS_CTRL) {
-      config_out->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt);
+      config->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt);
    } else {
-      config_out->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt);
+      config->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt);
    }
 }
 
@@ -2144,7 +2136,7 @@ static bool
 radv_postprocess_binary_config(struct radv_device *device, struct 
radv_shader_binary *binary,
                                const struct radv_shader_args *args)
 {
-   struct ac_shader_config config = {0};
+   struct ac_shader_config *config = &binary->config;
 
    if (binary->type == RADV_BINARY_TYPE_RTLD) {
 #if !defined(USE_LIBELF)
@@ -2156,29 +2148,26 @@ radv_postprocess_binary_config(struct radv_device 
*device, struct radv_shader_bi
          return false;
       }
 
-      if (!ac_rtld_read_config(&device->physical_device->rad_info, 
&rtld_binary, &config)) {
+      if (!ac_rtld_read_config(&device->physical_device->rad_info, 
&rtld_binary, config)) {
          ac_rtld_close(&rtld_binary);
          return false;
       }
 
       if (rtld_binary.lds_size > 0) {
          unsigned encode_granularity = 
device->physical_device->rad_info.lds_encode_granularity;
-         config.lds_size = DIV_ROUND_UP(rtld_binary.lds_size, 
encode_granularity);
+         config->lds_size = DIV_ROUND_UP(rtld_binary.lds_size, 
encode_granularity);
       }
-      if (!config.lds_size && binary->stage == MESA_SHADER_TESS_CTRL) {
+      if (!config->lds_size && binary->stage == MESA_SHADER_TESS_CTRL) {
          /* This is used for reporting LDS statistics */
-         config.lds_size = binary->info.tcs.num_lds_blocks;
+         config->lds_size = binary->info.tcs.num_lds_blocks;
       }
 
-      assert(!binary->info.has_ngg_culling || config.lds_size);
+      assert(!binary->info.has_ngg_culling || config->lds_size);
       ac_rtld_close(&rtld_binary);
 #endif
-   } else {
-      assert(binary->type == RADV_BINARY_TYPE_LEGACY);
-      config = ((struct radv_shader_binary_legacy *)binary)->base.config;
    }
 
-   radv_postprocess_config(device, &config, &binary->info, binary->stage, 
args, &binary->config);
+   radv_postprocess_config(device, config, &binary->info, binary->stage, args);
    return true;
 }
 

Reply via email to