Module: Mesa Branch: main Commit: 360f7c5d64b3f592a8d6682f9a4fd068c8594f12 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=360f7c5d64b3f592a8d6682f9a4fd068c8594f12
Author: Connor Abbott <[email protected]> Date: Thu Jan 27 18:33:55 2022 +0100 tu: Initial link-time optimizations This is mostly taken from radv, and cleaned up a bit: don't explicitly list every stage at the beginning, and name the shaders "producer" and "consumer" to reduce confusion. I also stripped out a lot of other stuff to get to the bare minimum of calling nir_link_opt_varyings, nir_remove_unused_varyings, and nir_compact_varyings and then cleaning up the fallout. In the future we may want to temporarily scalarize I/O like radv does, and add back a few things like the psize optimization. In the meantime this already provides a lot of benefit. Results from the radv fossil_db with some apps not compilable by turnip removed: Totals: MaxWaves: 1637288 -> 1668200 (+1.89%); split: +1.89%, -0.00% Instrs: 54620287 -> 54114442 (-0.93%); split: -0.98%, +0.05% CodeSize: 92235646 -> 91277584 (-1.04%); split: -1.07%, +0.03% NOPs: 11176775 -> 11185206 (+0.08%); split: -0.63%, +0.71% Full: 1689271 -> 1657175 (-1.90%); split: -1.92%, +0.02% (ss): 1318763 -> 1317757 (-0.08%); split: -1.40%, +1.32% (sy): 618795 -> 617724 (-0.17%); split: -0.70%, +0.53% (ss)-stall: 3496370 -> 3470116 (-0.75%); split: -1.37%, +0.62% (sy)-stall: 23512954 -> 23511164 (-0.01%); split: -1.04%, +1.03% STPs: 27557 -> 27461 (-0.35%) LDPs: 22948 -> 22804 (-0.63%) Cat0: 11823765 -> 11829681 (+0.05%); split: -0.62%, +0.67% Cat1: 3120042 -> 2991831 (-4.11%); split: -4.43%, +0.32% Cat2: 28605309 -> 28324829 (-0.98%); split: -0.98%, +0.00% Cat3: 7334628 -> 7252342 (-1.12%); split: -1.12%, +0.00% Cat4: 1216514 -> 1204894 (-0.96%) Cat5: 863976 -> 861926 (-0.24%) Cat6: 1648571 -> 1641457 (-0.43%) Totals from 23575 (16.16% of 145856) affected shaders: MaxWaves: 258806 -> 289718 (+11.94%); split: +11.94%, -0.00% Instrs: 7571190 -> 7065345 (-6.68%); split: -7.04%, +0.36% CodeSize: 13864308 -> 12906246 (-6.91%); split: -7.09%, +0.18% NOPs: 959185 -> 967616 (+0.88%); split: -7.35%, +8.23% Full: 313335 -> 281239 (-10.24%); split: -10.36%, +0.11% (ss): 154628 -> 153622 (-0.65%); split: -11.90%, +11.25% (sy): 69758 -> 68687 (-1.54%); split: -6.21%, +4.67% (ss)-stall: 322002 -> 295748 (-8.15%); split: -14.92%, +6.76% (sy)-stall: 3270366 -> 3268576 (-0.05%); split: -7.45%, +7.40% STPs: 3624 -> 3528 (-2.65%) LDPs: 1074 -> 930 (-13.41%) Cat0: 1022684 -> 1028600 (+0.58%); split: -7.13%, +7.71% Cat1: 531102 -> 402891 (-24.14%); split: -26.04%, +1.90% Cat2: 4090309 -> 3809829 (-6.86%); split: -6.86%, +0.00% Cat3: 1449686 -> 1367400 (-5.68%); split: -5.69%, +0.01% Cat4: 103543 -> 91923 (-11.22%) Cat5: 57441 -> 55391 (-3.57%) Cat6: 316096 -> 308982 (-2.25%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14767> --- src/freedreno/vulkan/tu_pipeline.c | 43 +++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index fa6bd9157c2..a0e18991596 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -2362,6 +2362,47 @@ tu_append_executable(struct tu_pipeline *pipeline, struct ir3_shader_variant *va util_dynarray_append(&pipeline->executables, struct tu_pipeline_executable, exe); } +static void +tu_link_shaders(struct tu_pipeline_builder *builder, + nir_shader **shaders, unsigned shaders_count) +{ + nir_shader *consumer = NULL; + for (gl_shader_stage stage = shaders_count - 1; + stage >= MESA_SHADER_VERTEX; stage--) { + if (!shaders[stage]) + continue; + + nir_shader *producer = shaders[stage]; + if (!consumer) { + consumer = producer; + continue; + } + + if (nir_link_opt_varyings(producer, consumer)) { + NIR_PASS_V(consumer, nir_opt_constant_folding); + NIR_PASS_V(consumer, nir_opt_algebraic); + NIR_PASS_V(consumer, nir_opt_dce); + } + + NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL); + NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); + + bool progress = nir_remove_unused_varyings(producer, consumer); + + nir_compact_varyings(producer, consumer, true); + if (progress) { + if (nir_lower_global_vars_to_local(producer)) { + /* Remove dead writes, which can remove input loads */ + NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL); + NIR_PASS_V(producer, nir_opt_dce); + } + nir_lower_global_vars_to_local(consumer); + } + + consumer = producer; + } +} + static VkResult tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) @@ -2417,7 +2458,7 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, } } - /* TODO do intra-stage linking here */ + tu_link_shaders(builder, nir, ARRAY_SIZE(nir)); uint32_t desc_sets = 0; for (gl_shader_stage stage = MESA_SHADER_VERTEX;
