On 25 October 2017 at 12:46, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote:
> I have something similar on my local tree (started on monday). > > Though, I don't like the way we expose the number of VGPRS/SGPRS because > we can't really figure out the number of spilled ones. My assumption was that if we've spilled then we've used all available registers, so if numUsed{V,S}gprs is greater than the number available, then you'd know that the number spilled is the difference between the two. Can we have spilling when num_{v,s}gprs is less than the number available? Alex > > > On 10/25/2017 01:18 PM, Alex Smith wrote: > >> This allows an app to query shader statistics and get a disassembly of >> a shader. RenderDoc git has support for it, so this allows you to view >> shader disassembly from a capture. >> >> When this extension is enabled on a device (or when tracing), we now >> disable pipeline caching, since we don't get the shader debug info when >> we retrieve cached shaders. >> >> Signed-off-by: Alex Smith <asm...@feralinteractive.com> >> --- >> src/amd/vulkan/radv_device.c | 9 ++ >> src/amd/vulkan/radv_extensions.py | 1 + >> src/amd/vulkan/radv_pipeline.c | 2 +- >> src/amd/vulkan/radv_pipeline_cache.c | 11 ++- >> src/amd/vulkan/radv_private.h | 3 + >> src/amd/vulkan/radv_shader.c | 163 >> ++++++++++++++++++++++++++++------- >> 6 files changed, 154 insertions(+), 35 deletions(-) >> >> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c >> index c4e25222ea..5603551680 100644 >> --- a/src/amd/vulkan/radv_device.c >> +++ b/src/amd/vulkan/radv_device.c >> @@ -943,10 +943,15 @@ VkResult radv_CreateDevice( >> VkResult result; >> struct radv_device *device; >> + bool keep_shader_info = false; >> + >> for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; >> i++) { >> const char *ext_name = pCreateInfo->ppEnabledExtensio >> nNames[i]; >> if >> (!radv_physical_device_extension_supported(physical_device, >> ext_name)) >> return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); >> + >> + if (strcmp(ext_name, VK_AMD_SHADER_INFO_EXTENSION_NAME) >> == 0) >> + keep_shader_info = true; >> } >> /* Check enabled features */ >> @@ -1040,10 +1045,14 @@ VkResult radv_CreateDevice( >> device->physical_device->rad_info.max_se >= 2; >> if (getenv("RADV_TRACE_FILE")) { >> + keep_shader_info = true; >> + >> if (!radv_init_trace(device)) >> goto fail; >> } >> + device->keep_shader_info = keep_shader_info; >> + >> result = radv_device_init_meta(device); >> if (result != VK_SUCCESS) >> goto fail; >> diff --git a/src/amd/vulkan/radv_extensions.py >> b/src/amd/vulkan/radv_extensions.py >> index dfeb2880fc..eeb679d65a 100644 >> --- a/src/amd/vulkan/radv_extensions.py >> +++ b/src/amd/vulkan/radv_extensions.py >> @@ -81,6 +81,7 @@ EXTENSIONS = [ >> Extension('VK_EXT_global_priority', 1, >> 'device->rad_info.has_ctx_priority'), >> Extension('VK_AMD_draw_indirect_count', 1, True), >> Extension('VK_AMD_rasterization_order', 1, >> 'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'), >> + Extension('VK_AMD_shader_info', 1, True), >> ] >> class VkVersion: >> diff --git a/src/amd/vulkan/radv_pipeline.c >> b/src/amd/vulkan/radv_pipeline.c >> index d6b33a5327..2df03a83cf 100644 >> --- a/src/amd/vulkan/radv_pipeline.c >> +++ b/src/amd/vulkan/radv_pipeline.c >> @@ -1874,7 +1874,7 @@ void radv_create_shaders(struct radv_pipeline >> *pipeline, >> if (device->instance->debug_flags & >> RADV_DEBUG_DUMP_SHADERS) >> nir_print_shader(nir[i], stderr); >> - if (!pipeline->device->trace_bo) >> + if (!pipeline->device->keep_shader_info) >> ralloc_free(nir[i]); >> } >> } >> diff --git a/src/amd/vulkan/radv_pipeline_cache.c >> b/src/amd/vulkan/radv_pipeline_cache.c >> index 9ba9a3b61b..46198799a7 100644 >> --- a/src/amd/vulkan/radv_pipeline_cache.c >> +++ b/src/amd/vulkan/radv_pipeline_cache.c >> @@ -62,9 +62,11 @@ radv_pipeline_cache_init(struct radv_pipeline_cache >> *cache, >> cache->hash_table = malloc(byte_size); >> /* We don't consider allocation failure fatal, we just start with >> a 0-sized >> - * cache. */ >> + * cache. Disable caching when we want to keep shader debug info, >> since >> + * we don't get the debug info on cached shaders. */ >> if (cache->hash_table == NULL || >> - (device->instance->debug_flags & RADV_DEBUG_NO_CACHE)) >> + (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) || >> + device->keep_shader_info) >> cache->table_size = 0; >> else >> memset(cache->hash_table, 0, byte_size); >> @@ -186,8 +188,11 @@ radv_create_shader_variants_from_pipeline_cache(struct >> radv_device *device, >> entry = radv_pipeline_cache_search_unlocked(cache, sha1); >> if (!entry) { >> + /* Again, don't cache when we want debug info, since this >> isn't >> + * present in the cache. */ >> if (!device->physical_device->disk_cache || >> - (device->instance->debug_flags & >> RADV_DEBUG_NO_CACHE)) { >> + (device->instance->debug_flags & >> RADV_DEBUG_NO_CACHE) || >> + device->keep_shader_info) { >> pthread_mutex_unlock(&cache->mutex); >> return false; >> } >> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private. >> h >> index a4e52b2530..169df5f37b 100644 >> --- a/src/amd/vulkan/radv_private.h >> +++ b/src/amd/vulkan/radv_private.h >> @@ -552,6 +552,9 @@ struct radv_device { >> struct radeon_winsys_bo *trace_bo; >> uint32_t *trace_id_ptr; >> + /* Whether to keep shader debug info, for tracing or >> VK_AMD_shader_info */ >> + bool keep_shader_info; >> + >> struct radv_physical_device *physical_device; >> /* Backup in-memory cache to be used if the app doesn't provide >> one */ >> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c >> index 5903917068..7f2f0fd750 100644 >> --- a/src/amd/vulkan/radv_shader.c >> +++ b/src/amd/vulkan/radv_shader.c >> @@ -46,6 +46,8 @@ >> #include "util/debug.h" >> #include "ac_exp_param.h" >> +#include "util/string_buffer.h" >> + >> static const struct nir_shader_compiler_options nir_options = { >> .vertex_id_zero_based = true, >> .lower_scmp = true, >> @@ -471,7 +473,7 @@ shader_variant_create(struct radv_device *device, >> free(binary.relocs); >> variant->ref_count = 1; >> - if (device->trace_bo) { >> + if (device->keep_shader_info) { >> variant->disasm_string = binary.disasm_string; >> if (!gs_copy_shader && !module->nir) { >> variant->nir = *shaders; >> @@ -593,11 +595,20 @@ radv_get_shader_name(struct radv_shader_variant >> *var, gl_shader_stage stage) >> }; >> } >> -void >> -radv_shader_dump_stats(struct radv_device *device, >> - struct radv_shader_variant *variant, >> - gl_shader_stage stage, >> - FILE *file) >> +static uint32_t >> +get_total_sgprs(struct radv_device *device) >> +{ >> + if (device->physical_device->rad_info.chip_class >= VI) >> + return 800; >> + else >> + return 512; >> +} >> + >> +static void >> +generate_shader_stats(struct radv_device *device, >> + struct radv_shader_variant *variant, >> + gl_shader_stage stage, >> + struct _mesa_string_buffer *buf) >> { >> unsigned lds_increment = device->physical_device->rad_info.chip_class >> >= CIK ? 512 : 256; >> struct ac_shader_config *conf; >> @@ -623,12 +634,8 @@ radv_shader_dump_stats(struct radv_device *device, >> lds_increment); >> } >> - if (conf->num_sgprs) { >> - if (device->physical_device->rad_info.chip_class >= VI) >> - max_simd_waves = MIN2(max_simd_waves, 800 / >> conf->num_sgprs); >> - else >> - max_simd_waves = MIN2(max_simd_waves, 512 / >> conf->num_sgprs); >> - } >> + if (conf->num_sgprs) >> + max_simd_waves = MIN2(max_simd_waves, >> get_total_sgprs(device) / conf->num_sgprs); >> if (conf->num_vgprs) >> max_simd_waves = MIN2(max_simd_waves, 256 / >> conf->num_vgprs); >> @@ -639,27 +646,121 @@ radv_shader_dump_stats(struct radv_device *device, >> if (lds_per_wave) >> max_simd_waves = MIN2(max_simd_waves, 16384 / >> lds_per_wave); >> + if (stage == MESA_SHADER_FRAGMENT) { >> + _mesa_string_buffer_printf(buf, "*** SHADER CONFIG ***\n" >> + "SPI_PS_INPUT_ADDR = 0x%04x\n" >> + "SPI_PS_INPUT_ENA = 0x%04x\n", >> + conf->spi_ps_input_addr, >> conf->spi_ps_input_ena); >> + } >> + >> + _mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n" >> + "SGPRS: %d\n" >> + "VGPRS: %d\n" >> + "Spilled SGPRs: %d\n" >> + "Spilled VGPRs: %d\n" >> + "Code Size: %d bytes\n" >> + "LDS: %d blocks\n" >> + "Scratch: %d bytes per wave\n" >> + "Max Waves: %d\n" >> + "********************\n\n\n", >> + conf->num_sgprs, conf->num_vgprs, >> + conf->spilled_sgprs, >> conf->spilled_vgprs, variant->code_size, >> + conf->lds_size, >> conf->scratch_bytes_per_wave, >> + max_simd_waves); >> +} >> + >> +void >> +radv_shader_dump_stats(struct radv_device *device, >> + struct radv_shader_variant *variant, >> + gl_shader_stage stage, >> + FILE *file) >> +{ >> + struct _mesa_string_buffer *buf = _mesa_string_buffer_create(NULL, >> 256); >> + >> + generate_shader_stats(device, variant, stage, buf); >> + >> fprintf(file, "\n%s:\n", radv_get_shader_name(variant, stage)); >> + fprintf(file, buf->buf); >> - if (stage == MESA_SHADER_FRAGMENT) { >> - fprintf(file, "*** SHADER CONFIG ***\n" >> - "SPI_PS_INPUT_ADDR = 0x%04x\n" >> - "SPI_PS_INPUT_ENA = 0x%04x\n", >> - conf->spi_ps_input_addr, conf->spi_ps_input_ena); >> + _mesa_string_buffer_destroy(buf); >> +} >> + >> +VkResult >> +radv_GetShaderInfoAMD(VkDevice _device, >> + VkPipeline _pipeline, >> + VkShaderStageFlagBits shaderStage, >> + VkShaderInfoTypeAMD infoType, >> + size_t* pInfoSize, >> + void* pInfo) >> +{ >> + RADV_FROM_HANDLE(radv_device, device, _device); >> + RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); >> + gl_shader_stage stage = vk_to_mesa_shader_stage(shaderStage); >> + struct radv_shader_variant *variant = pipeline->shaders[stage]; >> + struct _mesa_string_buffer *buf; >> + VkResult result = VK_SUCCESS; >> + >> + /* Spec doesn't indicate what to do if the stage is invalid, so >> just >> + * return no info for this. */ >> + if (!variant) >> + return VK_ERROR_FEATURE_NOT_PRESENT; >> + >> + switch (infoType) { >> + case VK_SHADER_INFO_TYPE_STATISTICS_AMD: >> + if (!pInfo) { >> + *pInfoSize = sizeof(VkShaderStatisticsInfoAMD); >> + } else { >> + struct ac_shader_config *conf = &variant->config; >> + >> + VkShaderStatisticsInfoAMD statistics = {}; >> + statistics.shaderStageMask = shaderStage; >> + statistics.resourceUsage.numUsedVgprs = >> conf->num_vgprs + conf->spilled_vgprs; >> + statistics.resourceUsage.numUsedSgprs = >> conf->num_sgprs + conf->spilled_sgprs; >> + statistics.resourceUsage.ldsSizePerLocalWorkGroup >> = 16384; >> + statistics.resourceUsage.ldsUsageSizeInBytes = >> conf->lds_size; >> + statistics.resourceUsage.scratchMemUsageInBytes >> = conf->scratch_bytes_per_wave; >> + statistics.numPhysicalVgprs = >> statistics.numAvailableVgprs = 256; >> + statistics.numPhysicalSgprs = >> statistics.numAvailableSgprs = get_total_sgprs(device); >> + statistics.computeWorkGroupSize[0] = >> variant->nir->info.cs.local_size[0]; >> + statistics.computeWorkGroupSize[1] = >> variant->nir->info.cs.local_size[1]; >> + statistics.computeWorkGroupSize[2] = >> variant->nir->info.cs.local_size[2]; >> + >> + size_t size = *pInfoSize; >> + *pInfoSize = sizeof(statistics); >> + >> + memcpy(pInfo, &statistics, MIN2(size, >> *pInfoSize)); >> + >> + if (size < *pInfoSize) >> + result = VK_INCOMPLETE; >> + } >> + >> + break; >> + case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD: >> + buf = _mesa_string_buffer_create(NULL, 1024); >> + >> + _mesa_string_buffer_printf(buf, "%s:\n", >> radv_get_shader_name(variant, stage)); >> + _mesa_string_buffer_printf(buf, "%s\n\n", >> variant->disasm_string); >> + generate_shader_stats(device, variant, stage, buf); >> + >> + if (!pInfo) { >> + *pInfoSize = buf->length; >> + } else { >> + size_t size = *pInfoSize; >> + *pInfoSize = buf->length; >> + >> + memcpy(pInfo, buf->buf, MIN2(size, buf->length)); >> + >> + if (size < buf->length) >> + result = VK_INCOMPLETE; >> + } >> + >> + _mesa_string_buffer_destroy(buf); >> + break; >> + default: >> + /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented for now. >> */ >> + result = VK_ERROR_FEATURE_NOT_PRESENT; >> + break; >> } >> - fprintf(file, "*** SHADER STATS ***\n" >> - "SGPRS: %d\n" >> - "VGPRS: %d\n" >> - "Spilled SGPRs: %d\n" >> - "Spilled VGPRs: %d\n" >> - "Code Size: %d bytes\n" >> - "LDS: %d blocks\n" >> - "Scratch: %d bytes per wave\n" >> - "Max Waves: %d\n" >> - "********************\n\n\n", >> - conf->num_sgprs, conf->num_vgprs, >> - conf->spilled_sgprs, conf->spilled_vgprs, >> variant->code_size, >> - conf->lds_size, conf->scratch_bytes_per_wave, >> - max_simd_waves); >> + return result; >> } >> >>
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev