Module: Mesa Branch: main Commit: 1161f22c2790a1c7617e02e66bdf56bdf39fee2d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1161f22c2790a1c7617e02e66bdf56bdf39fee2d
Author: Tatsuyuki Ishi <ishitatsuy...@gmail.com> Date: Thu Dec 14 18:22:38 2023 +0900 radv: Move up radv_get_max_waves, radv_get_max_scratch_waves. To avoid forward declaration. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26692> --- src/amd/vulkan/radv_shader.c | 106 +++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index b1ddde32e88..9b4b6fcef43 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -2049,6 +2049,59 @@ radv_shader_upload(struct radv_device *device, struct radv_shader *shader, const return true; } +unsigned +radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader, gl_shader_stage stage) +{ + const struct radeon_info *info = &device->physical_device->rad_info; + const enum amd_gfx_level gfx_level = info->gfx_level; + const uint8_t wave_size = shader->info.wave_size; + const struct ac_shader_config *conf = &shader->config; + unsigned max_simd_waves = info->max_waves_per_simd; + unsigned lds_per_wave = 0; + + if (stage == MESA_SHADER_FRAGMENT) { + lds_per_wave = conf->lds_size * info->lds_encode_granularity + shader->info.ps.num_interp * 48; + lds_per_wave = align(lds_per_wave, info->lds_alloc_granularity); + } else if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_TASK) { + unsigned max_workgroup_size = shader->info.workgroup_size; + lds_per_wave = align(conf->lds_size * info->lds_encode_granularity, info->lds_alloc_granularity); + lds_per_wave /= DIV_ROUND_UP(max_workgroup_size, wave_size); + } + + if (conf->num_sgprs && gfx_level < GFX10) { + unsigned sgprs = align(conf->num_sgprs, gfx_level >= GFX8 ? 16 : 8); + max_simd_waves = MIN2(max_simd_waves, info->num_physical_sgprs_per_simd / sgprs); + } + + if (conf->num_vgprs) { + unsigned physical_vgprs = info->num_physical_wave64_vgprs_per_simd * (64 / wave_size); + unsigned vgprs = align(conf->num_vgprs, wave_size == 32 ? 8 : 4); + if (gfx_level >= GFX10_3) { + unsigned real_vgpr_gran = info->num_physical_wave64_vgprs_per_simd / 64; + vgprs = util_align_npot(vgprs, real_vgpr_gran * (wave_size == 32 ? 2 : 1)); + } + max_simd_waves = MIN2(max_simd_waves, physical_vgprs / vgprs); + } + + unsigned simd_per_workgroup = info->num_simd_per_compute_unit; + if (gfx_level >= GFX10) + simd_per_workgroup *= 2; /* like lds_size_per_workgroup, assume WGP on GFX10+ */ + + unsigned max_lds_per_simd = info->lds_size_per_workgroup / simd_per_workgroup; + if (lds_per_wave) + max_simd_waves = MIN2(max_simd_waves, DIV_ROUND_UP(max_lds_per_simd, lds_per_wave)); + + return gfx_level >= GFX10 ? max_simd_waves * (wave_size / 32) : max_simd_waves; +} + +unsigned +radv_get_max_scratch_waves(const struct radv_device *device, struct radv_shader *shader) +{ + const unsigned num_cu = device->physical_device->rad_info.num_cu; + + return MIN2(device->scratch_waves, 4 * num_cu * radv_get_max_waves(device, shader, shader->info.stage)); +} + VkResult radv_shader_create_uncached(struct radv_device *device, const struct radv_shader_binary *binary, bool replayable, struct radv_serialized_shader_arena_block *replay_block, struct radv_shader **out_shader) @@ -2877,59 +2930,6 @@ radv_get_shader_name(const struct radv_shader_info *info, gl_shader_stage stage) }; } -unsigned -radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader, gl_shader_stage stage) -{ - const struct radeon_info *info = &device->physical_device->rad_info; - const enum amd_gfx_level gfx_level = info->gfx_level; - const uint8_t wave_size = shader->info.wave_size; - const struct ac_shader_config *conf = &shader->config; - unsigned max_simd_waves = info->max_waves_per_simd; - unsigned lds_per_wave = 0; - - if (stage == MESA_SHADER_FRAGMENT) { - lds_per_wave = conf->lds_size * info->lds_encode_granularity + shader->info.ps.num_interp * 48; - lds_per_wave = align(lds_per_wave, info->lds_alloc_granularity); - } else if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_TASK) { - unsigned max_workgroup_size = shader->info.workgroup_size; - lds_per_wave = align(conf->lds_size * info->lds_encode_granularity, info->lds_alloc_granularity); - lds_per_wave /= DIV_ROUND_UP(max_workgroup_size, wave_size); - } - - if (conf->num_sgprs && gfx_level < GFX10) { - unsigned sgprs = align(conf->num_sgprs, gfx_level >= GFX8 ? 16 : 8); - max_simd_waves = MIN2(max_simd_waves, info->num_physical_sgprs_per_simd / sgprs); - } - - if (conf->num_vgprs) { - unsigned physical_vgprs = info->num_physical_wave64_vgprs_per_simd * (64 / wave_size); - unsigned vgprs = align(conf->num_vgprs, wave_size == 32 ? 8 : 4); - if (gfx_level >= GFX10_3) { - unsigned real_vgpr_gran = info->num_physical_wave64_vgprs_per_simd / 64; - vgprs = util_align_npot(vgprs, real_vgpr_gran * (wave_size == 32 ? 2 : 1)); - } - max_simd_waves = MIN2(max_simd_waves, physical_vgprs / vgprs); - } - - unsigned simd_per_workgroup = info->num_simd_per_compute_unit; - if (gfx_level >= GFX10) - simd_per_workgroup *= 2; /* like lds_size_per_workgroup, assume WGP on GFX10+ */ - - unsigned max_lds_per_simd = info->lds_size_per_workgroup / simd_per_workgroup; - if (lds_per_wave) - max_simd_waves = MIN2(max_simd_waves, DIV_ROUND_UP(max_lds_per_simd, lds_per_wave)); - - return gfx_level >= GFX10 ? max_simd_waves * (wave_size / 32) : max_simd_waves; -} - -unsigned -radv_get_max_scratch_waves(const struct radv_device *device, struct radv_shader *shader) -{ - const unsigned num_cu = device->physical_device->rad_info.num_cu; - - return MIN2(device->scratch_waves, 4 * num_cu * radv_get_max_waves(device, shader, shader->info.stage)); -} - unsigned radv_compute_spi_ps_input(const struct radv_pipeline_key *pipeline_key, const struct radv_shader_info *info) {